summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/codecs/libtremor/asm_arm.h168
-rw-r--r--apps/codecs/libtremor/asm_mcf5249.h32
-rw-r--r--apps/codecs/libtremor/block.c181
-rw-r--r--apps/codecs/libtremor/config-tremor.h8
-rw-r--r--apps/codecs/libtremor/ffmpeg_render_line.h71
-rw-r--r--apps/codecs/libtremor/ivorbiscodec.h16
-rw-r--r--apps/codecs/libtremor/mapping0.c28
-rw-r--r--apps/codecs/libtremor/misc.h27
-rw-r--r--apps/codecs/libtremor/synthesis.c20
-rw-r--r--apps/codecs/libtremor/window.c4
-rw-r--r--apps/codecs/libtremor/window.h4
11 files changed, 143 insertions, 416 deletions
diff --git a/apps/codecs/libtremor/asm_arm.h b/apps/codecs/libtremor/asm_arm.h
index eb0d3ca789..42f82ec088 100644
--- a/apps/codecs/libtremor/asm_arm.h
+++ b/apps/codecs/libtremor/asm_arm.h
@@ -17,174 +17,6 @@
17 17
18#ifdef _ARM_ASSEM_ 18#ifdef _ARM_ASSEM_
19 19
20#if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
21#define _V_WIDE_MATH
22#ifndef _TREMOR_VECT_OPS
23#define _TREMOR_VECT_OPS
24/* asm versions of vector operations for block.c, window.c */
25/* SOME IMPORTANT NOTES: this implementation of vect_mult_bw does
26 NOT do a final shift, meaning that the result of vect_mult_bw is
27 only 31 bits not 32. This is so that we can do the shift in-place
28 in vect_add_xxxx instead to save one instruction for each mult on arm */
29static inline
30void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
31{
32 /* first arg is right subframe of previous frame and second arg
33 is left subframe of current frame. overlap left onto right overwriting
34 the right subframe */
35
36 do{
37 asm volatile (
38 "ldmia %[x], {r0, r1, r2, r3};"
39 "ldmia %[y]!, {r4, r5, r6, r7};"
40 "add r0, r4, r0, lsl #1;"
41 "add r1, r5, r1, lsl #1;"
42 "add r2, r6, r2, lsl #1;"
43 "add r3, r7, r3, lsl #1;"
44 "stmia %[x]!, {r0, r1, r2, r3};"
45 "ldmia %[x], {r0, r1, r2, r3};"
46 "ldmia %[y]!, {r4, r5, r6, r7};"
47 "add r0, r4, r0, lsl #1;"
48 "add r1, r5, r1, lsl #1;"
49 "add r2, r6, r2, lsl #1;"
50 "add r3, r7, r3, lsl #1;"
51 "stmia %[x]!, {r0, r1, r2, r3};"
52 : [x] "+r" (x), [y] "+r" (y)
53 : : "r0", "r1", "r2", "r3",
54 "r4", "r5", "r6", "r7",
55 "memory");
56 n -= 8;
57 } while (n);
58}
59
60static inline
61void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
62{
63 /* first arg is left subframe of current frame and second arg
64 is right subframe of previous frame. overlap right onto left overwriting
65 the LEFT subframe */
66 do{
67 asm volatile (
68 "ldmia %[x], {r0, r1, r2, r3};"
69 "ldmia %[y]!, {r4, r5, r6, r7};"
70 "add r0, r0, r4, lsl #1;"
71 "add r1, r1, r5, lsl #1;"
72 "add r2, r2, r6, lsl #1;"
73 "add r3, r3, r7, lsl #1;"
74 "stmia %[x]!, {r0, r1, r2, r3};"
75 "ldmia %[x], {r0, r1, r2, r3};"
76 "ldmia %[y]!, {r4, r5, r6, r7};"
77 "add r0, r0, r4, lsl #1;"
78 "add r1, r1, r5, lsl #1;"
79 "add r2, r2, r6, lsl #1;"
80 "add r3, r3, r7, lsl #1;"
81 "stmia %[x]!, {r0, r1, r2, r3};"
82 : [x] "+r" (x), [y] "+r" (y)
83 : : "r0", "r1", "r2", "r3",
84 "r4", "r5", "r6", "r7",
85 "memory");
86 n -= 8;
87 } while (n);
88}
89
90#if ARM_ARCH >= 6
91static inline
92void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
93{
94 /* Note, mult_fw uses MULT31 */
95 do{
96 asm volatile (
97 "ldmia %[d], {r0, r1, r2, r3};"
98 "ldmia %[w]!, {r4, r5, r6, r7};"
99 "smmul r0, r4, r0;"
100 "smmul r1, r5, r1;"
101 "smmul r2, r6, r2;"
102 "smmul r3, r7, r3;"
103 "mov r0, r0, lsl #1;"
104 "mov r1, r1, lsl #1;"
105 "mov r2, r2, lsl #1;"
106 "mov r3, r3, lsl #1;"
107 "stmia %[d]!, {r0, r1, r2, r3};"
108 : [d] "+r" (data), [w] "+r" (window)
109 : : "r0", "r1", "r2", "r3",
110 "r4", "r5", "r6", "r7",
111 "memory" );
112 n -= 4;
113 } while (n);
114}
115#else
116static inline
117void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
118{
119 /* Note, mult_fw uses MULT31 */
120 do{
121 asm volatile (
122 "ldmia %[d], {r0, r1, r2, r3};"
123 "ldmia %[w]!, {r4, r5, r6, r7};"
124 "smull r8, r0, r4, r0;"
125 "mov r0, r0, lsl #1;"
126 "smull r8, r1, r5, r1;"
127 "mov r1, r1, lsl #1;"
128 "smull r8, r2, r6, r2;"
129 "mov r2, r2, lsl #1;"
130 "smull r8, r3, r7, r3;"
131 "mov r3, r3, lsl #1;"
132 "stmia %[d]!, {r0, r1, r2, r3};"
133 : [d] "+r" (data), [w] "+r" (window)
134 : : "r0", "r1", "r2", "r3",
135 "r4", "r5", "r6", "r7", "r8",
136 "memory" );
137 n -= 4;
138 } while (n);
139}
140#endif
141
142#if ARM_ARCH >= 6
143static inline
144void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
145{
146 /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
147 /* On ARM, we can do the shift at the same time as the overlap-add */
148 do{
149 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
150 "ldmda %[w]!, {r4, r5, r6, r7};"
151 "smmul r0, r7, r0;"
152 "smmul r1, r6, r1;"
153 "smmul r2, r5, r2;"
154 "smmul r3, r4, r3;"
155 "stmia %[d]!, {r0, r1, r2, r3};"
156 : [d] "+r" (data), [w] "+r" (window)
157 : : "r0", "r1", "r2", "r3",
158 "r4", "r5", "r6", "r7",
159 "memory" );
160 n -= 4;
161 } while (n);
162}
163#else
164static inline
165void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
166{
167 /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
168 /* On ARM, we can do the shift at the same time as the overlap-add */
169 do{
170 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
171 "ldmda %[w]!, {r4, r5, r6, r7};"
172 "smull r8, r0, r7, r0;"
173 "smull r7, r1, r6, r1;"
174 "smull r6, r2, r5, r2;"
175 "smull r5, r3, r4, r3;"
176 "stmia %[d]!, {r0, r1, r2, r3};"
177 : [d] "+r" (data), [w] "+r" (window)
178 : : "r0", "r1", "r2", "r3",
179 "r4", "r5", "r6", "r7", "r8",
180 "memory" );
181 n -= 4;
182 } while (n);
183}
184#endif
185#endif
186#endif
187
188#ifndef _V_LSP_MATH_ASM 20#ifndef _V_LSP_MATH_ASM
189#define _V_LSP_MATH_ASM 21#define _V_LSP_MATH_ASM
190 22
diff --git a/apps/codecs/libtremor/asm_mcf5249.h b/apps/codecs/libtremor/asm_mcf5249.h
index 3e7d46983e..66de07615f 100644
--- a/apps/codecs/libtremor/asm_mcf5249.h
+++ b/apps/codecs/libtremor/asm_mcf5249.h
@@ -28,37 +28,5 @@
28 28
29#define MB() 29#define MB()
30 30
31#ifndef _TREMOR_VECT_OPS
32#define _TREMOR_VECT_OPS
33static inline
34void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
35{
36 /* coldfire asm has symmetrical versions of vect_add_right_left
37 and vect_add_left_right (since symmetrical versions of
38 vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
39 vect_add(x, y, n );
40}
41
42static inline
43void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
44{
45 /* coldfire asm has symmetrical versions of vect_add_right_left
46 and vect_add_left_right (since symmetrical versions of
47 vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
48 vect_add(x, y, n );
49}
50
51static inline
52void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n)
53{
54 vect_mult_fw(data, window, n);
55}
56
57static inline
58void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n)
59{
60 vect_mult_bw(data, window, n);
61}
62#endif
63#endif 31#endif
64#endif 32#endif
diff --git a/apps/codecs/libtremor/block.c b/apps/codecs/libtremor/block.c
index d678719cbe..b7ce5adc4e 100644
--- a/apps/codecs/libtremor/block.c
+++ b/apps/codecs/libtremor/block.c
@@ -25,6 +25,7 @@
25#include "window.h" 25#include "window.h"
26#include "registry.h" 26#include "registry.h"
27#include "misc.h" 27#include "misc.h"
28#include "ffmpeg_render_line.h"
28//#include <codecs/lib/codeclib.h> 29//#include <codecs/lib/codeclib.h>
29 30
30static int ilog(unsigned int v){ 31static int ilog(unsigned int v){
@@ -37,11 +38,10 @@ static int ilog(unsigned int v){
37 return(ret); 38 return(ret);
38} 39}
39 40
40static ogg_int32_t* _pcmp [CHANNELS] IBSS_ATTR;
41static ogg_int32_t* _pcmbp[CHANNELS] IBSS_ATTR; 41static ogg_int32_t* _pcmbp[CHANNELS] IBSS_ATTR;
42static ogg_int32_t* _pcmret[CHANNELS] IBSS_ATTR; 42static ogg_int32_t* _pcmret[CHANNELS] IBSS_ATTR;
43/* save original pointers returned by malloc so we can free it easily */ 43/* save original pointers returned by malloc so we can free it easily */
44static ogg_int32_t* pcm_copy[CHANNELS] = {NULL}; 44static ogg_int32_t* malloc_pointers[3] = {NULL};
45 45
46/* pcm accumulator examples (not exhaustive): 46/* pcm accumulator examples (not exhaustive):
47 47
@@ -153,14 +153,13 @@ int vorbis_block_clear(vorbis_block *vb){
153static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ 153static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
154 int i; 154 int i;
155 long b_size[2]; 155 long b_size[2];
156 156
157 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; 157 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
158 private_state *b=NULL; 158 private_state *b=NULL;
159 159
160 if(ci==NULL) return 1; 160 if(ci==NULL) return 1;
161 161
162 memset(v,0,sizeof(*v)); 162 memset(v,0,sizeof(*v));
163 v->reset_pcmb=true;
164 b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b))); 163 b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b)));
165 164
166 v->vi=vi; 165 v->vi=vi;
@@ -169,16 +168,42 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
169#ifdef TREMOR_USE_IRAM 168#ifdef TREMOR_USE_IRAM
170 /* allocate IRAM buffer for the PCM data generated by synthesis */ 169 /* allocate IRAM buffer for the PCM data generated by synthesis */
171 iram_malloc_init(); 170 iram_malloc_init();
172 v->first_pcm = iram_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t)); 171
173 /* when can't allocate IRAM buffer, allocate normal RAM buffer */ 172 v->floors = iram_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
174 if(v->first_pcm == NULL) 173 v->residues[0] = iram_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
174 /* if we can get away with it, put a double buffer into IRAM too, so that
175 overlap-add runs iram-to-iram and we avoid needing to memcpy */
176 v->residues[1] = iram_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
177 if (v->residues[1] == NULL)
178 v->saved = iram_malloc(vi->channels*ci->blocksizes[1]/4*sizeof(ogg_int32_t));
179
175#endif 180#endif
176 { 181
177 pcm_copy[0] = _ogg_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t)); 182 if (v->residues[0] == NULL) {
178 v->first_pcm = pcm_copy[0]; 183 malloc_pointers[0] = _ogg_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
184 v->residues[0] = malloc_pointers[0];
185 }
186
187 if (v->residues[1] == NULL && v->saved == NULL) {
188 malloc_pointers[1] = _ogg_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
189 v->residues[1] = malloc_pointers[1];
179 } 190 }
180 191
181 v->centerW=0; 192 if (v->floors == NULL) {
193 malloc_pointers[2] = _ogg_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
194 v->floors = malloc_pointers[2];
195 }
196
197 /* needed for the first overlap/add */
198 if (v->saved) {
199 memset(v->saved, 0, vi->channels*ci->blocksizes[1]/4*sizeof(ogg_int32_t));
200 for (i = 0; i < vi->channels; i++)
201 v->saved_ptr[i] = v->saved + i*ci->blocksizes[1]/4;
202 } else {
203 memset(v->residues[1], 0, vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
204 for (i = 0; i < vi->channels; i++)
205 v->saved_ptr[i] = v->residues[1] + i*ci->blocksizes[1]/2;
206 }
182 207
183 /* Vorbis I uses only window type 0 */ 208 /* Vorbis I uses only window type 0 */
184 b_size[0]=ci->blocksizes[0]/2; 209 b_size[0]=ci->blocksizes[0]/2;
@@ -214,37 +239,13 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
214 } 239 }
215 } 240 }
216 241
217 /* if we can get away with it, put a double buffer into IRAM too, so that
218 overlap-add runs iram-to-iram and we avoid needing to memcpy */
219 v->pcm_storage=ci->blocksizes[1]; 242 v->pcm_storage=ci->blocksizes[1];
220 v->pcm=_pcmp;
221 v->pcmret=_pcmret; 243 v->pcmret=_pcmret;
222 v->pcmb=_pcmbp; 244 v->pcmb=_pcmbp;
223 245
224 _pcmp[0]=NULL;
225 _pcmp[1]=NULL;
226 _pcmbp[0]=NULL; 246 _pcmbp[0]=NULL;
227 _pcmbp[1]=NULL; 247 _pcmbp[1]=NULL;
228 248
229#ifdef TREMOR_USE_IRAM
230 if(NULL != (v->iram_double_pcm = iram_malloc(vi->channels*v->pcm_storage*sizeof(ogg_int32_t))))
231 {
232 /* one-time initialisation at codec start or on switch from
233 blocksizes greater than IRAM_PCM_END to sizes that fit */
234 for(i=0;i<vi->channels;i++)
235 v->pcm[i]=&v->iram_double_pcm[i*v->pcm_storage];
236 }
237 else
238#endif
239 {
240 /* one-time initialisation at codec start or on switch from
241 blocksizes that fit in IRAM_PCM_END to those that don't */
242 /* save copy of the pointer so we can free it easily later */
243 pcm_copy[1] = _ogg_calloc(vi->channels*v->pcm_storage,sizeof(*v->pcm[i]));
244 for(i=0;i<vi->channels;i++)
245 v->pcm[i] = pcm_copy[1]+i*v->pcm_storage;
246 }
247
248 /* all 1 (large block) or 0 (small block) */ 249 /* all 1 (large block) or 0 (small block) */
249 /* explicitly set for the sake of clarity */ 250 /* explicitly set for the sake of clarity */
250 v->lW=0; /* previous window size */ 251 v->lW=0; /* previous window size */
@@ -274,35 +275,19 @@ abort_books:
274int vorbis_synthesis_restart(vorbis_dsp_state *v){ 275int vorbis_synthesis_restart(vorbis_dsp_state *v){
275 vorbis_info *vi=v->vi; 276 vorbis_info *vi=v->vi;
276 codec_setup_info *ci; 277 codec_setup_info *ci;
277 int i;
278 278
279 if(!v->backend_state)return -1; 279 if(!v->backend_state)return -1;
280 if(!vi)return -1; 280 if(!vi)return -1;
281 ci=vi->codec_setup; 281 ci=vi->codec_setup;
282 if(!ci)return -1; 282 if(!ci)return -1;
283 283
284 v->centerW=0;
285 v->pcm_current=0; 284 v->pcm_current=0;
286 285
287 v->pcm_returned=-1; 286 v->pcm_returned=-1;
288 v->granulepos=-1; 287 v->granulepos=-1;
289 v->sequence=-1; 288 v->sequence=-1;
290 ((private_state *)(v->backend_state))->sample_count=-1; 289 ((private_state *)(v->backend_state))->sample_count=-1;
291 290
292 /* indicate to synthesis code that buffer pointers no longer valid
293 (if we're using double pcm buffer) and will need to reset them */
294 v->reset_pcmb = true;
295 /* also reset our copy of the double buffer pointers if we have one */
296#ifdef TREMOR_USE_IRAM
297 if(v->iram_double_pcm)
298 {
299 for(i=0;i<vi->channels;i++)
300 v->pcm[i]=&v->iram_double_pcm[i*v->pcm_storage];
301 }
302#else
303 for(i=0;i<vi->channels;i++)
304 v->pcm[i] = pcm_copy[1]+i*v->pcm_storage;
305#endif
306 return(0); 291 return(0);
307} 292}
308 293
@@ -323,11 +308,10 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){
323 if(vi != NULL) 308 if(vi != NULL)
324 { 309 {
325 /* pcm buffer came from oggmalloc rather than iram */ 310 /* pcm buffer came from oggmalloc rather than iram */
326 for(i=0;i<2;i++) 311 for(i=0;i<3;i++)
327 if(pcm_copy[i]) 312 if(malloc_pointers[i]) {
328 { 313 _ogg_free(malloc_pointers[i]);
329 _ogg_free(pcm_copy[i]); 314 malloc_pointers[i] = NULL;
330 pcm_copy[i] = NULL;
331 } 315 }
332 } 316 }
333 317
@@ -359,10 +343,6 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
359 vorbis_info *vi=v->vi; 343 vorbis_info *vi=v->vi;
360 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; 344 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
361 private_state *b=v->backend_state; 345 private_state *b=v->backend_state;
362 int j;
363#ifdef TREMOR_USE_IRAM
364 bool iram_pcm_doublebuffer = (NULL != v->iram_double_pcm);
365#endif
366 346
367 if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL); 347 if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL);
368 348
@@ -380,79 +360,11 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
380 int n=ci->blocksizes[v->W]/2; 360 int n=ci->blocksizes[v->W]/2;
381 int ln=ci->blocksizes[v->lW]/2; 361 int ln=ci->blocksizes[v->lW]/2;
382 362
383 if(LIKELY(vb->pcm)){ /* no pcm to process if vorbis_synthesis_trackonly 363 if(LIKELY(vb->pcmend != 0)){ /* no pcm to process if vorbis_synthesis_trackonly
384 was called on block */ 364 was called on block */
385 int prevCenter; 365 window_overlap_add(ci->blocksizes[v->W], ci->blocksizes[v->lW],
386 int n0=ci->blocksizes[0]/2; 366 ci->blocksizes[0], ci->blocksizes[1], vi->channels,
387 int n1=ci->blocksizes[1]/2; 367 b->window[v->W & v->lW], v);
388
389#ifdef TREMOR_USE_IRAM
390 if(!iram_pcm_doublebuffer)
391 {
392 prevCenter = v->centerW;
393 v->centerW = n1 - v->centerW;
394 }
395 else
396#endif
397 prevCenter = ln;
398
399 /* overlap/add PCM */
400 /* nb nothing to overlap with on first block so don't bother */
401 if(LIKELY(v->pcm_returned!=-1))
402 {
403 for(j=0;j<vi->channels;j++)
404 {
405 ogg_int32_t *pcm=v->pcm[j]+prevCenter;
406 ogg_int32_t *p=vb->pcm[j];
407
408 /* the overlap/add section */
409 if(v->lW == v->W)
410 {
411 /* large/large or small/small */
412 vect_add_right_left(pcm,p,n);
413 v->pcmb[j]=pcm;
414 }
415 else if (!v->W)
416 {
417 /* large/small */
418 vect_add_right_left(pcm + (n1-n0)/2, p, n0);
419 v->pcmb[j]=pcm;
420 }
421 else
422 {
423 /* small/large */
424 p += (n1-n0)/2;
425 vect_add_left_right(p,pcm,n0);
426 v->pcmb[j]=p;
427 }
428 }
429 }
430#ifdef TREMOR_USE_IRAM
431 /* the copy section */
432 if(!iram_pcm_doublebuffer)
433 {
434 for(j=0;j<vi->channels;j++)
435 {
436 /* at best only vb->pcm is in iram, and that's where we do the
437 synthesis, so we copy out the right-hand subframe of last
438 synthesis into (noniram) local buffer so we can still do
439 synth in iram */
440 vect_copy(v->pcm[j]+v->centerW, vb->pcm[j]+n, n);
441 }
442 }
443 else
444#endif
445 {
446 /* just flip the pointers over as we have a double buffer in iram */
447 ogg_int32_t *p;
448 p=v->pcm[0];
449 v->pcm[0]=vb->pcm[0];
450 vb->pcm[0] = p;
451 p=v->pcm[1];
452 v->pcm[1]=vb->pcm[1];
453 vb->pcm[1] = p;
454 }
455
456 /* deal with initial packet state; we do this using the explicit 368 /* deal with initial packet state; we do this using the explicit
457 pcm_returned==-1 flag otherwise we're sensitive to first block 369 pcm_returned==-1 flag otherwise we're sensitive to first block
458 being short or long */ 370 being short or long */
@@ -464,7 +376,6 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
464 v->pcm_returned=0; 376 v->pcm_returned=0;
465 v->pcm_current=(n+ln)/2; 377 v->pcm_current=(n+ln)/2;
466 } 378 }
467
468 } 379 }
469 380
470 /* track the frame number... This is for convenience, but also 381 /* track the frame number... This is for convenience, but also
diff --git a/apps/codecs/libtremor/config-tremor.h b/apps/codecs/libtremor/config-tremor.h
index eba0fe0912..cf2dbe977d 100644
--- a/apps/codecs/libtremor/config-tremor.h
+++ b/apps/codecs/libtremor/config-tremor.h
@@ -40,10 +40,10 @@
40 40
41/* Define CPU of Normal IRAM (96KB) */ 41/* Define CPU of Normal IRAM (96KB) */
42#else 42#else
43/* PCM_BUFFER : 16384 Byte (2048*2*4) * 43/* floor and double residue buffer : 24576 Byte (2048/2*4*2*3) *
44 * WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) * 44 * WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) *
45 * TOTAL : 20992 */ 45 * TOTAL : 29184 */
46#define IRAM_IBSS_SIZE 20992 46#define IRAM_IBSS_SIZE 29184
47#endif 47#endif
48#endif 48#endif
49 49
diff --git a/apps/codecs/libtremor/ffmpeg_render_line.h b/apps/codecs/libtremor/ffmpeg_render_line.h
index 1b760ae20e..a08952d95d 100644
--- a/apps/codecs/libtremor/ffmpeg_render_line.h
+++ b/apps/codecs/libtremor/ffmpeg_render_line.h
@@ -21,6 +21,9 @@
21 */ 21 */
22 22
23/* render_line and friend taken from ffmpeg (libavcodec/vorbis.c) */ 23/* render_line and friend taken from ffmpeg (libavcodec/vorbis.c) */
24
25#include "misc.h"
26
24static inline void render_line_unrolled(int x, int y, int x1, 27static inline void render_line_unrolled(int x, int y, int x1,
25 int sy, int ady, int adx, 28 int sy, int ady, int adx,
26 const ogg_int32_t *lookup, ogg_int32_t *buf) 29 const ogg_int32_t *lookup, ogg_int32_t *buf)
@@ -45,8 +48,8 @@ static inline void render_line_unrolled(int x, int y, int x1,
45 } 48 }
46} 49}
47 50
48static void render_line(int x0, int y0, int x1, int y1, 51static inline void render_line(int x0, int y0, int x1, int y1,
49 const ogg_int32_t *lookup, ogg_int32_t *buf) 52 const ogg_int32_t *lookup, ogg_int32_t *buf)
50{ 53{
51 int dy = y1 - y0; 54 int dy = y1 - y0;
52 int adx = x1 - x0; 55 int adx = x1 - x0;
@@ -72,3 +75,67 @@ static void render_line(int x0, int y0, int x1, int y1,
72 } 75 }
73 } 76 }
74} 77}
78
79#ifndef INCL_OPTIMIZED_VECTOR_FMUL_WINDOW
80#define INCL_OPTIMIZED_VECTOR_FMUL_WINDOW
81static inline void ff_vector_fmul_window_c(ogg_int32_t *dst, const ogg_int32_t *src0,
82 const ogg_int32_t *src1, const ogg_int32_t *win, int len){
83 int i,j;
84 dst += len;
85 win += len;
86 src0+= len;
87 for(i=-len, j=len-1; i<0; i++, j--) {
88 ogg_int32_t s0 = src0[i];
89 ogg_int32_t s1 = src1[j];
90 ogg_int32_t wi = win[i];
91 ogg_int32_t wj = win[j];
92 XNPROD31(s0, s1, wj, wi, &dst[i], &dst[j]);
93 /*
94 dst[i] = MULT31(s0,wj) - MULT31(s1,wi);
95 dst[j] = MULT31(s0,wi) + MULT31(s1,wj);
96 */
97 }
98}
99#endif
100
101static inline void copy_normalize(ogg_int32_t *dst, ogg_int32_t *src, int len)
102{
103 memcpy(dst, src, len * sizeof(ogg_int32_t));
104}
105
106static inline void window_overlap_add(unsigned int blocksize, unsigned int lastblock,
107 unsigned int bs0, unsigned int bs1, int ch,
108 const ogg_int32_t *win, vorbis_dsp_state *v)
109{
110 unsigned retlen = (blocksize + lastblock) / 4;
111 int j;
112 for (j = 0; j < ch; j++) {
113 ogg_int32_t *residue = v->residues[v->ri] + j * blocksize / 2;
114 ogg_int32_t *saved;
115 saved = v->saved_ptr[j];
116 ogg_int32_t *ret = v->floors + j * retlen;
117 ogg_int32_t *buf = residue;
118
119 if (v->W == v->lW) {
120 ff_vector_fmul_window_c(ret, saved, buf, win, blocksize / 4);
121 } else if (v->W > v->lW) {
122 ff_vector_fmul_window_c(ret, saved, buf, win, bs0 / 4);
123 copy_normalize(ret+bs0/2, buf+bs0/4, (bs1-bs0)/4);
124 } else {
125 copy_normalize(ret, saved, (bs1 - bs0) / 4);
126 ff_vector_fmul_window_c(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, bs0 / 4);
127 }
128 if (v->residues[1] == NULL) {
129 memcpy(saved, buf + blocksize / 4, blocksize / 4 * sizeof(ogg_int32_t));
130 v->saved_ptr[j] = v->saved + j * bs1 / 4;
131 } else {
132 v->saved_ptr[j] = buf + blocksize / 4;
133 }
134
135 v->pcmb[j] = ret;
136 }
137
138 if (v->residues[1] != NULL) {
139 v->ri ^= 1;
140 }
141}
diff --git a/apps/codecs/libtremor/ivorbiscodec.h b/apps/codecs/libtremor/ivorbiscodec.h
index 23b62c48bd..73ba4aeb2a 100644
--- a/apps/codecs/libtremor/ivorbiscodec.h
+++ b/apps/codecs/libtremor/ivorbiscodec.h
@@ -59,7 +59,13 @@ typedef struct vorbis_info{
59typedef struct vorbis_dsp_state{ 59typedef struct vorbis_dsp_state{
60 vorbis_info *vi; 60 vorbis_info *vi;
61 61
62 ogg_int32_t **pcm; 62 ogg_int32_t *residues[2];
63 ogg_int32_t *floors;
64 ogg_int32_t *saved;
65 ogg_int32_t *saved_ptr[CHANNELS];
66
67 int ri;
68
63 ogg_int32_t **pcmb; 69 ogg_int32_t **pcmb;
64 ogg_int32_t **pcmret; 70 ogg_int32_t **pcmret;
65 int pcm_storage; 71 int pcm_storage;
@@ -71,23 +77,15 @@ typedef struct vorbis_dsp_state{
71 long lW; 77 long lW;
72 long W; 78 long W;
73 long nW; 79 long nW;
74 long centerW;
75 80
76 ogg_int64_t granulepos; 81 ogg_int64_t granulepos;
77 ogg_int64_t sequence; 82 ogg_int64_t sequence;
78 83
79 void *backend_state; 84 void *backend_state;
80
81 ogg_int32_t *first_pcm; /* PCM buffer (for normal RAM or IRAM)*/
82#ifdef TREMOR_USE_IRAM
83 ogg_int32_t *iram_double_pcm; /* PCM 2nd buffer for IRAM */
84#endif
85 bool reset_pcmb;
86} vorbis_dsp_state; 85} vorbis_dsp_state;
87 86
88typedef struct vorbis_block{ 87typedef struct vorbis_block{
89 /* necessary stream state for linking to the framing abstraction */ 88 /* necessary stream state for linking to the framing abstraction */
90 ogg_int32_t **pcm; /* this is a pointer into local storage */
91 oggpack_buffer opb; 89 oggpack_buffer opb;
92 90
93 long lW; 91 long lW;
diff --git a/apps/codecs/libtremor/mapping0.c b/apps/codecs/libtremor/mapping0.c
index 9042b9c174..084d5e076d 100644
--- a/apps/codecs/libtremor/mapping0.c
+++ b/apps/codecs/libtremor/mapping0.c
@@ -302,7 +302,6 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
302 vorbis_dsp_state *vd=vb->vd; 302 vorbis_dsp_state *vd=vb->vd;
303 vorbis_info *vi=vd->vi; 303 vorbis_info *vi=vd->vi;
304 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; 304 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
305 private_state *b=(private_state *)vd->backend_state;
306 vorbis_look_mapping0 *look=(vorbis_look_mapping0 *)l; 305 vorbis_look_mapping0 *look=(vorbis_look_mapping0 *)l;
307 vorbis_info_mapping0 *info=look->map; 306 vorbis_info_mapping0 *info=look->map;
308 307
@@ -329,8 +328,8 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
329 if(floormemo[i]) 328 if(floormemo[i])
330 nonzero[i]=1; 329 nonzero[i]=1;
331 else 330 else
332 nonzero[i]=0; 331 nonzero[i]=0;
333 memset(vb->pcm[i],0,sizeof(*vb->pcm[i])*n/2); 332 memset(vd->floors + i * ci->blocksizes[vb->W]/2,0,sizeof(ogg_int32_t)*n/2);
334 } 333 }
335 334
336 /* channel coupling can 'dirty' the nonzero listing */ 335 /* channel coupling can 'dirty' the nonzero listing */
@@ -351,7 +350,7 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
351 zerobundle[ch_in_bundle]=1; 350 zerobundle[ch_in_bundle]=1;
352 else 351 else
353 zerobundle[ch_in_bundle]=0; 352 zerobundle[ch_in_bundle]=0;
354 pcmbundle[ch_in_bundle++]=vb->pcm[j]; 353 pcmbundle[ch_in_bundle++] = vd->floors + j * ci->blocksizes[vb->W]/2;
355 } 354 }
356 } 355 }
357 356
@@ -365,8 +364,8 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
365 364
366 /* channel coupling */ 365 /* channel coupling */
367 for(i=info->coupling_steps-1;i>=0;i--){ 366 for(i=info->coupling_steps-1;i>=0;i--){
368 ogg_int32_t *pcmM=vb->pcm[info->coupling_mag[i]]; 367 ogg_int32_t *pcmM = vd->floors + info->coupling_mag[i] * ci->blocksizes[vb->W]/2;
369 ogg_int32_t *pcmA=vb->pcm[info->coupling_ang[i]]; 368 ogg_int32_t *pcmA = vd->floors + info->coupling_ang[i] * ci->blocksizes[vb->W]/2;
370 channel_couple(pcmM,pcmA,n); 369 channel_couple(pcmM,pcmA,n);
371 } 370 }
372 371
@@ -378,24 +377,21 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
378 377
379 /* transform the PCM data; takes PCM vector, vb; modifies PCM vector */ 378 /* transform the PCM data; takes PCM vector, vb; modifies PCM vector */
380 /* only MDCT right now.... */ 379 /* only MDCT right now.... */
381
382 for(i=0;i<vi->channels;i++){ 380 for(i=0;i<vi->channels;i++){
383 ogg_int32_t *pcm=vb->pcm[i]; 381 ogg_int32_t *pcm = vd->floors + i*ci->blocksizes[vb->W]/2;
384 int submap=info->chmuxlist[i]; 382 int submap=info->chmuxlist[i];
385 383
386 if(nonzero[i]) { 384 if(nonzero[i]) {
387 /* compute and apply spectral envelope */ 385 /* compute and apply spectral envelope */
388 look->floor_func[submap]-> 386 look->floor_func[submap]->
389 inverse2(vb,look->floor_look[submap],floormemo[i],pcm); 387 inverse2(vb,look->floor_look[submap],floormemo[i],pcm);
390 388
391 ff_imdct_calc(ci->blocksizes_nbits[vb->W], 389 ff_imdct_half(ci->blocksizes_nbits[vb->W],
392 (int32_t*)pcm, 390 (int32_t*)vd->residues[vd->ri] + i*ci->blocksizes[vb->W]/2,
393 (int32_t*)pcm); 391 (int32_t*)&vd->floors[i*ci->blocksizes[vb->W]/2]);
394 /* window the data */
395 _vorbis_apply_window(pcm,b->window,ci->blocksizes,vb->lW,vb->W,vb->nW);
396 } 392 }
397 else 393 else
398 memset(pcm, 0, sizeof(ogg_int32_t)*n); 394 memset(vd->residues[vd->ri] + i*ci->blocksizes[vb->W]/2, 0, sizeof(ogg_int32_t)*n/2);
399 } 395 }
400 396
401 //for(j=0;j<vi->channels;j++) 397 //for(j=0;j<vi->channels;j++)
diff --git a/apps/codecs/libtremor/misc.h b/apps/codecs/libtremor/misc.h
index 0b0ff4d3a7..592a60ffd8 100644
--- a/apps/codecs/libtremor/misc.h
+++ b/apps/codecs/libtremor/misc.h
@@ -26,7 +26,6 @@
26 26
27#include "asm_arm.h" 27#include "asm_arm.h"
28#include "asm_mcf5249.h" 28#include "asm_mcf5249.h"
29
30 29
31/* Some prototypes that were not defined elsewhere */ 30/* Some prototypes that were not defined elsewhere */
32void *_vorbis_block_alloc(vorbis_block *vb,long bytes); 31void *_vorbis_block_alloc(vorbis_block *vb,long bytes);
@@ -155,32 +154,6 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
155} 154}
156*/ 155*/
157#endif 156#endif
158#ifndef _TREMOR_VECT_OPS
159#define _TREMOR_VECT_OPS
160static inline
161void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
162{
163 vect_add(x, y, n );
164}
165
166static inline
167void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
168{
169 vect_add(x, y, n );
170}
171
172static inline
173void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n)
174{
175 vect_mult_fw(data, window, n);
176}
177
178static inline
179void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n)
180{
181 vect_mult_bw(data, window, n);
182}
183#endif
184 157
185#if 0 158#if 0
186#ifndef _V_VECT_OPS 159#ifndef _V_VECT_OPS
diff --git a/apps/codecs/libtremor/synthesis.c b/apps/codecs/libtremor/synthesis.c
index 657aa74e11..d1ef99ae37 100644
--- a/apps/codecs/libtremor/synthesis.c
+++ b/apps/codecs/libtremor/synthesis.c
@@ -25,15 +25,13 @@
25#include "os.h" 25#include "os.h"
26 26
27 27
28static ogg_int32_t *ipcm_vect[CHANNELS] IBSS_ATTR;
29
30static inline int _vorbis_synthesis1(vorbis_block *vb,ogg_packet *op,int decodep){ 28static inline int _vorbis_synthesis1(vorbis_block *vb,ogg_packet *op,int decodep){
31 vorbis_dsp_state *vd= vb ? vb->vd : 0; 29 vorbis_dsp_state *vd= vb ? vb->vd : 0;
32 private_state *b= vd ? (private_state *)vd->backend_state: 0; 30 private_state *b= vd ? (private_state *)vd->backend_state: 0;
33 vorbis_info *vi= vd ? vd->vi : 0; 31 vorbis_info *vi= vd ? vd->vi : 0;
34 codec_setup_info *ci= vi ? (codec_setup_info *)vi->codec_setup : 0; 32 codec_setup_info *ci= vi ? (codec_setup_info *)vi->codec_setup : 0;
35 oggpack_buffer *opb=vb ? &vb->opb : 0; 33 oggpack_buffer *opb=vb ? &vb->opb : 0;
36 int type,mode,i; 34 int type,mode;
37 35
38 if (!vd || !b || !vi || !ci || !opb) { 36 if (!vd || !b || !vi || !ci || !opb) {
39 return OV_EBADPACKET; 37 return OV_EBADPACKET;
@@ -75,22 +73,8 @@ static inline int _vorbis_synthesis1(vorbis_block *vb,ogg_packet *op,int decodep
75 73
76 if(decodep && vi->channels<=CHANNELS) 74 if(decodep && vi->channels<=CHANNELS)
77 { 75 {
78 vb->pcm = ipcm_vect;
79
80 /* set pcm end point */ 76 /* set pcm end point */
81 vb->pcmend=ci->blocksizes[vb->W]; 77 vb->pcmend=ci->blocksizes[vb->W];
82 /* use statically allocated buffer */
83 if(vd->reset_pcmb || vb->pcm[0]==NULL)
84 {
85 /* one-time initialisation at codec start
86 NOT for every block synthesis start
87 allows us to flip between buffers once initialised
88 by simply flipping pointers */
89 for(i=0; i<vi->channels; i++)
90 vb->pcm[i] = &vd->first_pcm[i*ci->blocksizes[1]];
91
92 }
93 vd->reset_pcmb = false;
94 78
95 /* unpack_header enforces range checking */ 79 /* unpack_header enforces range checking */
96 type=ci->map_type[ci->mode_param[mode]->mapping]; 80 type=ci->map_type[ci->mode_param[mode]->mapping];
@@ -98,8 +82,6 @@ static inline int _vorbis_synthesis1(vorbis_block *vb,ogg_packet *op,int decodep
98 }else{ 82 }else{
99 /* no pcm */ 83 /* no pcm */
100 vb->pcmend=0; 84 vb->pcmend=0;
101 vb->pcm=NULL;
102
103 return(0); 85 return(0);
104 } 86 }
105} 87}
diff --git a/apps/codecs/libtremor/window.c b/apps/codecs/libtremor/window.c
index e46008aef0..3bc947f0e5 100644
--- a/apps/codecs/libtremor/window.c
+++ b/apps/codecs/libtremor/window.c
@@ -53,7 +53,7 @@ const void *_vorbis_window(int type, int left){
53 return(0); 53 return(0);
54 } 54 }
55} 55}
56 56#if 0
57void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2], 57void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
58 long *blocksizes, 58 long *blocksizes,
59 int lW,int W,int nW){ 59 int lW,int W,int nW){
@@ -79,4 +79,4 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
79 /* Again - memset not needed 79 /* Again - memset not needed
80 memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */ 80 memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */
81} 81}
82 82#endif
diff --git a/apps/codecs/libtremor/window.h b/apps/codecs/libtremor/window.h
index b242ec2354..166d0b8e9c 100644
--- a/apps/codecs/libtremor/window.h
+++ b/apps/codecs/libtremor/window.h
@@ -19,9 +19,9 @@
19#define _V_WINDOW_ 19#define _V_WINDOW_
20 20
21extern const void *_vorbis_window(int type,int left); 21extern const void *_vorbis_window(int type,int left);
22extern void _vorbis_apply_window(ogg_int32_t *d,const void *window[2], 22/*extern void _vorbis_apply_window(ogg_int32_t *d,const void *window[2],
23 long *blocksizes, 23 long *blocksizes,
24 int lW,int W,int nW); 24 int lW,int W,int nW);*/
25 25
26 26
27#endif 27#endif