summaryrefslogtreecommitdiff
path: root/apps/codecs/libtremor/block.c
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/libtremor/block.c')
-rw-r--r--apps/codecs/libtremor/block.c222
1 files changed, 143 insertions, 79 deletions
diff --git a/apps/codecs/libtremor/block.c b/apps/codecs/libtremor/block.c
index e609fc44f7..eb087e12a9 100644
--- a/apps/codecs/libtremor/block.c
+++ b/apps/codecs/libtremor/block.c
@@ -36,6 +36,13 @@ static int ilog(unsigned int v){
36 return(ret); 36 return(ret);
37} 37}
38 38
39static ogg_int32_t* _pcmp [CHANNELS] IBSS_ATTR;
40static ogg_int32_t* _pcmbp[CHANNELS] IBSS_ATTR;
41static ogg_int32_t* _pcmret[CHANNELS] IBSS_ATTR;
42/* if true, we have both pcm buffers in iram and we use a bufferflip.
43 if false, we have one in iram and one in mem, and we use a memcpy */
44static bool iram_pcm_doublebuffer IBSS_ATTR;
45
39/* pcm accumulator examples (not exhaustive): 46/* pcm accumulator examples (not exhaustive):
40 47
41 <-------------- lW ----------------> 48 <-------------- lW ---------------->
@@ -145,18 +152,44 @@ int vorbis_block_clear(vorbis_block *vb){
145 152
146static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ 153static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
147 int i; 154 int i;
155 long b_size[2];
156 LOOKUP_TNC *iramposw;
157 ogg_int32_t *internal_pcm=NULL;
158
148 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; 159 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
149 private_state *b=NULL; 160 private_state *b=NULL;
150 161
151 memset(v,0,sizeof(*v)); 162 memset(v,0,sizeof(*v));
163 v->reset_pcmb=true;
152 b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b))); 164 b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b)));
153 165
154 v->vi=vi; 166 v->vi=vi;
155 b->modebits=ilog(ci->modes); 167 b->modebits=ilog(ci->modes);
156 168
169 /* allocate IRAM buffer for the PCM data generated by synthesis */
170 iram_malloc_init();
171 v->iram_pcm=(ogg_int32_t *)iram_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t));
172 if(v->iram_pcm != NULL) v->iram_pcm_storage=ci->blocksizes[1];
173 else v->iram_pcm_storage=0;
174
175 v->centerW=0;
176
157 /* Vorbis I uses only window type 0 */ 177 /* Vorbis I uses only window type 0 */
158 b->window[0]=_vorbis_window(0,ci->blocksizes[0]/2); 178 b_size[0]=ci->blocksizes[0]/2;
159 b->window[1]=_vorbis_window(0,ci->blocksizes[1]/2); 179 b_size[1]=ci->blocksizes[1]/2;
180 b->window[0]=_vorbis_window(0,b_size[0]);
181 b->window[1]=_vorbis_window(0,b_size[1]);
182
183 /* allocate IRAM buffer for window tables too, if sufficient iram available */
184 /* give preference to the larger window over the smaller window
185 (on the assumption that both windows are equally likely used) */
186 for(i=1; i>=0; i--){
187 iramposw=(LOOKUP_TNC *)iram_malloc(b_size[i]*sizeof(LOOKUP_TNC));
188 if(iramposw!=NULL) {
189 memcpy(iramposw, b->window[i], b_size[i]*sizeof(LOOKUP_TNC));
190 b->window[i]=iramposw;
191 }
192 }
160 193
161 /* finish the codebooks */ 194 /* finish the codebooks */
162 if(!ci->fullbooks){ 195 if(!ci->fullbooks){
@@ -169,14 +202,34 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
169 } 202 }
170 } 203 }
171 204
205 /* if we can get away with it, put a double buffer into IRAM too, so that
206 overlap-add runs iram-to-iram and we avoid needing to memcpy */
172 v->pcm_storage=ci->blocksizes[1]; 207 v->pcm_storage=ci->blocksizes[1];
173 v->pcm=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcm)); 208 v->pcm=_pcmp;
174 v->pcmb=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmb)); 209 v->pcmret=_pcmret;
175 v->pcmret=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmret)); 210 v->pcmb=_pcmbp;
176 211
177 for(i=0;i<vi->channels;i++) 212 _pcmp[0]=NULL;
178 v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i])); 213 _pcmp[1]=NULL;
179 214 _pcmbp[0]=NULL;
215 _pcmbp[1]=NULL;
216
217 if(NULL != (internal_pcm = iram_malloc(vi->channels*v->pcm_storage*sizeof(ogg_int32_t))))
218 {
219 /* one-time initialisation at codec start or on switch from
220 blocksizes greater than IRAM_PCM_END to sizes that fit */
221 for(i=0;i<vi->channels;i++)
222 v->pcm[i]=&internal_pcm[i*v->pcm_storage];
223 iram_pcm_doublebuffer = true;
224 }
225 else
226 {
227 /* one-time initialisation at codec start or on switch from
228 blocksizes that fit in IRAM_PCM_END to those that don't */
229 for(i=0;i<vi->channels;i++)
230 v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i]));
231 iram_pcm_doublebuffer = false;
232 }
180 233
181 /* all 1 (large block) or 0 (small block) */ 234 /* all 1 (large block) or 0 (small block) */
182 /* explicitly set for the sake of clarity */ 235 /* explicitly set for the sake of clarity */
@@ -203,13 +256,17 @@ int vorbis_synthesis_restart(vorbis_dsp_state *v){
203 ci=vi->codec_setup; 256 ci=vi->codec_setup;
204 if(!ci)return -1; 257 if(!ci)return -1;
205 258
206 v->centerW=ci->blocksizes[1]/2; 259 v->centerW=0;
207 v->pcm_current=v->centerW; 260 v->pcm_current=0;
208 261
209 v->pcm_returned=-1; 262 v->pcm_returned=-1;
210 v->granulepos=-1; 263 v->granulepos=-1;
211 v->sequence=-1; 264 v->sequence=-1;
212 ((private_state *)(v->backend_state))->sample_count=-1; 265 ((private_state *)(v->backend_state))->sample_count=-1;
266
267 /* indicate to synthesis code that buffer pointers no longer valid
268 (if we're using double pcm buffer) and will need to reset them */
269 v->reset_pcmb = true;
213 270
214 return(0); 271 return(0);
215} 272}
@@ -228,11 +285,12 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){
228 codec_setup_info *ci=(codec_setup_info *)(vi?vi->codec_setup:NULL); 285 codec_setup_info *ci=(codec_setup_info *)(vi?vi->codec_setup:NULL);
229 private_state *b=(private_state *)v->backend_state; 286 private_state *b=(private_state *)v->backend_state;
230 287
231 if(v->pcm){ 288 if(!iram_pcm_doublebuffer)
232 for(i=0;i<vi->channels;i++) 289 {
233 if(v->pcm[i])_ogg_free(v->pcm[i]); 290 if(v->pcm){
234 _ogg_free(v->pcm); 291 for(i=0;i<vi->channels;i++)
235 if(v->pcmret)_ogg_free(v->pcmret); 292 if(v->pcm[i])_ogg_free(v->pcm[i]);
293 }
236 } 294 }
237 295
238 /* free mode lookups; these are actually vorbis_look_mapping structs */ 296 /* free mode lookups; these are actually vorbis_look_mapping structs */
@@ -258,7 +316,7 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){
258 calling (as it relies on the previous block). */ 316 calling (as it relies on the previous block). */
259 317
260int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb) 318int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb)
261 ICODE_ATTR_TREMOR_NOT_MDCT; 319 ICODE_ATTR;
262int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ 320int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
263 vorbis_info *vi=v->vi; 321 vorbis_info *vi=v->vi;
264 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; 322 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
@@ -278,85 +336,91 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
278 } 336 }
279 337
280 v->sequence=vb->sequence; 338 v->sequence=vb->sequence;
339 int n=ci->blocksizes[v->W]/2;
340 int ln=ci->blocksizes[v->lW]/2;
281 341
282 if(vb->pcm){ /* no pcm to process if vorbis_synthesis_trackonly 342 if(LIKELY(vb->pcm)){ /* no pcm to process if vorbis_synthesis_trackonly
283 was called on block */ 343 was called on block */
284 int n=ci->blocksizes[v->W]/2; 344 int prevCenter;
285 int n0=ci->blocksizes[0]/2; 345 int n0=ci->blocksizes[0]/2;
286 int n1=ci->blocksizes[1]/2; 346 int n1=ci->blocksizes[1]/2;
287 347
288 int thisCenter; 348 if(iram_pcm_doublebuffer)
289 int prevCenter; 349 {
290 350 prevCenter = ln;
291 if(v->centerW){ 351 }
292 thisCenter=n1; 352 else
293 prevCenter=0; 353 {
294 }else{ 354 prevCenter = v->centerW;
295 thisCenter=0; 355 v->centerW = n1 - v->centerW;
296 prevCenter=n1;
297 } 356 }
298
299 /* v->pcm is now used like a two-stage double buffer. We don't want
300 to have to constantly shift *or* adjust memory usage. Don't
301 accept a new block until the old is shifted out */
302 357
303 /* overlap/add PCM */ 358 /* overlap/add PCM */
304 359 /* nb nothing to overlap with on first block so don't bother */
305 for(j=0;j<vi->channels;j++){ 360 if(LIKELY(v->pcm_returned!=-1))
306 /* the overlap/add section */ 361 {
307 if(v->lW){ 362 for(j=0;j<vi->channels;j++)
308 if(v->W){ 363 {
309 /* large/large */ 364 ogg_int32_t *pcm=v->pcm[j]+prevCenter;
310 ogg_int32_t *pcm=v->pcm[j]+prevCenter; 365 ogg_int32_t *p=vb->pcm[j];
311 ogg_int32_t *p=vb->pcm[j]; 366
312 vect_add(p, pcm, n1); 367 /* the overlap/add section */
313 v->pcmb[j]=p; 368 if(v->lW == v->W)
314 }else{ 369 {
315 /* large/small */ 370 /* large/large or small/small */
316 ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; 371 vect_add_right_left(pcm,p,n);
317 ogg_int32_t *p=vb->pcm[j]; 372 v->pcmb[j]=pcm;
318 vect_add(pcm, p, n0);
319 v->pcmb[j]=v->pcm[j]+prevCenter;
320 } 373 }
321 }else{ 374 else if (!v->W)
322 if(v->W){ 375 {
323 /* small/large */ 376 /* large/small */
324 ogg_int32_t *pcm=v->pcm[j]+prevCenter; 377 vect_add_right_left(pcm + (n1-n0)/2, p, n0);
325 ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; 378 v->pcmb[j]=pcm;
326 vect_add(p, pcm, n0); 379 }
327 v->pcmb[j]=p; 380 else
328 }else{ 381 {
329 /* small/small */ 382 /* small/large */
330 ogg_int32_t *pcm=v->pcm[j]+prevCenter; 383 p += (n1-n0)/2;
331 ogg_int32_t *p=vb->pcm[j]; 384 vect_add_left_right(p,pcm,n0);
332 vect_add(p, pcm, n0); 385 v->pcmb[j]=p;
333 v->pcmb[j]=p;
334 } 386 }
335 } 387 }
336 388 }
337 /* the copy section */ 389
390 /* the copy section */
391 if(iram_pcm_doublebuffer)
392 {
393 /* just flip the pointers over as we have a double buffer in iram */
394 ogg_int32_t *p;
395 p=v->pcm[0];
396 v->pcm[0]=vb->pcm[0];
397 vb->pcm[0] = p;
398 p=v->pcm[1];
399 v->pcm[1]=vb->pcm[1];
400 vb->pcm[1] = p;
401 }
402 else
403 {
404 for(j=0;j<vi->channels;j++)
338 { 405 {
339 ogg_int32_t *pcm=v->pcm[j]+thisCenter; 406 /* at best only vb->pcm is in iram, and that's where we do the
340 ogg_int32_t *p=vb->pcm[j]+n; 407 synthesis, so we copy out the right-hand subframe of last
341 vect_copy(pcm, p, n); 408 synthesis into (noniram) local buffer so we can still do
409 synth in iram */
410 vect_copy(v->pcm[j]+v->centerW, vb->pcm[j]+n, n);
342 } 411 }
343 } 412 }
344 413
345 if(v->centerW)
346 v->centerW=0;
347 else
348 v->centerW=n1;
349
350 /* deal with initial packet state; we do this using the explicit 414 /* deal with initial packet state; we do this using the explicit
351 pcm_returned==-1 flag otherwise we're sensitive to first block 415 pcm_returned==-1 flag otherwise we're sensitive to first block
352 being short or long */ 416 being short or long */
353 417
354 if(v->pcm_returned==-1){ 418 if(v->pcm_returned==-1){
355 v->pcm_returned=thisCenter; 419 v->pcm_returned=0;
356 v->pcm_current=thisCenter; 420 v->pcm_current=0;
357 }else{ 421 }else{
358 v->pcm_returned=0; 422 v->pcm_returned=0;
359 v->pcm_current=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4; 423 v->pcm_current=(n+ln)/2;
360 } 424 }
361 425
362 } 426 }
@@ -375,7 +439,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
375 if(b->sample_count==-1){ 439 if(b->sample_count==-1){
376 b->sample_count=0; 440 b->sample_count=0;
377 }else{ 441 }else{
378 b->sample_count+=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4; 442 b->sample_count+=(n+ln)/2;
379 } 443 }
380 444
381 if(v->granulepos==-1){ 445 if(v->granulepos==-1){
@@ -406,7 +470,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
406 470
407 } 471 }
408 }else{ 472 }else{
409 v->granulepos+=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4; 473 v->granulepos+=(n+ln)/2;
410 if(vb->granulepos!=-1 && v->granulepos!=vb->granulepos){ 474 if(vb->granulepos!=-1 && v->granulepos!=vb->granulepos){
411 475
412 if(v->granulepos>vb->granulepos){ 476 if(v->granulepos>vb->granulepos){