diff options
Diffstat (limited to 'apps/codecs/libtremor/block.c')
-rw-r--r-- | apps/codecs/libtremor/block.c | 222 |
1 files changed, 143 insertions, 79 deletions
diff --git a/apps/codecs/libtremor/block.c b/apps/codecs/libtremor/block.c index e609fc44f7..eb087e12a9 100644 --- a/apps/codecs/libtremor/block.c +++ b/apps/codecs/libtremor/block.c | |||
@@ -36,6 +36,13 @@ static int ilog(unsigned int v){ | |||
36 | return(ret); | 36 | return(ret); |
37 | } | 37 | } |
38 | 38 | ||
39 | static ogg_int32_t* _pcmp [CHANNELS] IBSS_ATTR; | ||
40 | static ogg_int32_t* _pcmbp[CHANNELS] IBSS_ATTR; | ||
41 | static ogg_int32_t* _pcmret[CHANNELS] IBSS_ATTR; | ||
42 | /* if true, we have both pcm buffers in iram and we use a bufferflip. | ||
43 | if false, we have one in iram and one in mem, and we use a memcpy */ | ||
44 | static bool iram_pcm_doublebuffer IBSS_ATTR; | ||
45 | |||
39 | /* pcm accumulator examples (not exhaustive): | 46 | /* pcm accumulator examples (not exhaustive): |
40 | 47 | ||
41 | <-------------- lW ----------------> | 48 | <-------------- lW ----------------> |
@@ -145,18 +152,44 @@ int vorbis_block_clear(vorbis_block *vb){ | |||
145 | 152 | ||
146 | static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ | 153 | static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ |
147 | int i; | 154 | int i; |
155 | long b_size[2]; | ||
156 | LOOKUP_TNC *iramposw; | ||
157 | ogg_int32_t *internal_pcm=NULL; | ||
158 | |||
148 | codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; | 159 | codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; |
149 | private_state *b=NULL; | 160 | private_state *b=NULL; |
150 | 161 | ||
151 | memset(v,0,sizeof(*v)); | 162 | memset(v,0,sizeof(*v)); |
163 | v->reset_pcmb=true; | ||
152 | b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b))); | 164 | b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b))); |
153 | 165 | ||
154 | v->vi=vi; | 166 | v->vi=vi; |
155 | b->modebits=ilog(ci->modes); | 167 | b->modebits=ilog(ci->modes); |
156 | 168 | ||
169 | /* allocate IRAM buffer for the PCM data generated by synthesis */ | ||
170 | iram_malloc_init(); | ||
171 | v->iram_pcm=(ogg_int32_t *)iram_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t)); | ||
172 | if(v->iram_pcm != NULL) v->iram_pcm_storage=ci->blocksizes[1]; | ||
173 | else v->iram_pcm_storage=0; | ||
174 | |||
175 | v->centerW=0; | ||
176 | |||
157 | /* Vorbis I uses only window type 0 */ | 177 | /* Vorbis I uses only window type 0 */ |
158 | b->window[0]=_vorbis_window(0,ci->blocksizes[0]/2); | 178 | b_size[0]=ci->blocksizes[0]/2; |
159 | b->window[1]=_vorbis_window(0,ci->blocksizes[1]/2); | 179 | b_size[1]=ci->blocksizes[1]/2; |
180 | b->window[0]=_vorbis_window(0,b_size[0]); | ||
181 | b->window[1]=_vorbis_window(0,b_size[1]); | ||
182 | |||
183 | /* allocate IRAM buffer for window tables too, if sufficient iram available */ | ||
184 | /* give preference to the larger window over the smaller window | ||
185 | (on the assumption that both windows are equally likely used) */ | ||
186 | for(i=1; i>=0; i--){ | ||
187 | iramposw=(LOOKUP_TNC *)iram_malloc(b_size[i]*sizeof(LOOKUP_TNC)); | ||
188 | if(iramposw!=NULL) { | ||
189 | memcpy(iramposw, b->window[i], b_size[i]*sizeof(LOOKUP_TNC)); | ||
190 | b->window[i]=iramposw; | ||
191 | } | ||
192 | } | ||
160 | 193 | ||
161 | /* finish the codebooks */ | 194 | /* finish the codebooks */ |
162 | if(!ci->fullbooks){ | 195 | if(!ci->fullbooks){ |
@@ -169,14 +202,34 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ | |||
169 | } | 202 | } |
170 | } | 203 | } |
171 | 204 | ||
205 | /* if we can get away with it, put a double buffer into IRAM too, so that | ||
206 | overlap-add runs iram-to-iram and we avoid needing to memcpy */ | ||
172 | v->pcm_storage=ci->blocksizes[1]; | 207 | v->pcm_storage=ci->blocksizes[1]; |
173 | v->pcm=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcm)); | 208 | v->pcm=_pcmp; |
174 | v->pcmb=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmb)); | 209 | v->pcmret=_pcmret; |
175 | v->pcmret=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmret)); | 210 | v->pcmb=_pcmbp; |
176 | 211 | ||
177 | for(i=0;i<vi->channels;i++) | 212 | _pcmp[0]=NULL; |
178 | v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i])); | 213 | _pcmp[1]=NULL; |
179 | 214 | _pcmbp[0]=NULL; | |
215 | _pcmbp[1]=NULL; | ||
216 | |||
217 | if(NULL != (internal_pcm = iram_malloc(vi->channels*v->pcm_storage*sizeof(ogg_int32_t)))) | ||
218 | { | ||
219 | /* one-time initialisation at codec start or on switch from | ||
220 | blocksizes greater than IRAM_PCM_END to sizes that fit */ | ||
221 | for(i=0;i<vi->channels;i++) | ||
222 | v->pcm[i]=&internal_pcm[i*v->pcm_storage]; | ||
223 | iram_pcm_doublebuffer = true; | ||
224 | } | ||
225 | else | ||
226 | { | ||
227 | /* one-time initialisation at codec start or on switch from | ||
228 | blocksizes that fit in IRAM_PCM_END to those that don't */ | ||
229 | for(i=0;i<vi->channels;i++) | ||
230 | v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i])); | ||
231 | iram_pcm_doublebuffer = false; | ||
232 | } | ||
180 | 233 | ||
181 | /* all 1 (large block) or 0 (small block) */ | 234 | /* all 1 (large block) or 0 (small block) */ |
182 | /* explicitly set for the sake of clarity */ | 235 | /* explicitly set for the sake of clarity */ |
@@ -203,13 +256,17 @@ int vorbis_synthesis_restart(vorbis_dsp_state *v){ | |||
203 | ci=vi->codec_setup; | 256 | ci=vi->codec_setup; |
204 | if(!ci)return -1; | 257 | if(!ci)return -1; |
205 | 258 | ||
206 | v->centerW=ci->blocksizes[1]/2; | 259 | v->centerW=0; |
207 | v->pcm_current=v->centerW; | 260 | v->pcm_current=0; |
208 | 261 | ||
209 | v->pcm_returned=-1; | 262 | v->pcm_returned=-1; |
210 | v->granulepos=-1; | 263 | v->granulepos=-1; |
211 | v->sequence=-1; | 264 | v->sequence=-1; |
212 | ((private_state *)(v->backend_state))->sample_count=-1; | 265 | ((private_state *)(v->backend_state))->sample_count=-1; |
266 | |||
267 | /* indicate to synthesis code that buffer pointers no longer valid | ||
268 | (if we're using double pcm buffer) and will need to reset them */ | ||
269 | v->reset_pcmb = true; | ||
213 | 270 | ||
214 | return(0); | 271 | return(0); |
215 | } | 272 | } |
@@ -228,11 +285,12 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){ | |||
228 | codec_setup_info *ci=(codec_setup_info *)(vi?vi->codec_setup:NULL); | 285 | codec_setup_info *ci=(codec_setup_info *)(vi?vi->codec_setup:NULL); |
229 | private_state *b=(private_state *)v->backend_state; | 286 | private_state *b=(private_state *)v->backend_state; |
230 | 287 | ||
231 | if(v->pcm){ | 288 | if(!iram_pcm_doublebuffer) |
232 | for(i=0;i<vi->channels;i++) | 289 | { |
233 | if(v->pcm[i])_ogg_free(v->pcm[i]); | 290 | if(v->pcm){ |
234 | _ogg_free(v->pcm); | 291 | for(i=0;i<vi->channels;i++) |
235 | if(v->pcmret)_ogg_free(v->pcmret); | 292 | if(v->pcm[i])_ogg_free(v->pcm[i]); |
293 | } | ||
236 | } | 294 | } |
237 | 295 | ||
238 | /* free mode lookups; these are actually vorbis_look_mapping structs */ | 296 | /* free mode lookups; these are actually vorbis_look_mapping structs */ |
@@ -258,7 +316,7 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){ | |||
258 | calling (as it relies on the previous block). */ | 316 | calling (as it relies on the previous block). */ |
259 | 317 | ||
260 | int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb) | 318 | int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb) |
261 | ICODE_ATTR_TREMOR_NOT_MDCT; | 319 | ICODE_ATTR; |
262 | int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | 320 | int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ |
263 | vorbis_info *vi=v->vi; | 321 | vorbis_info *vi=v->vi; |
264 | codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; | 322 | codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; |
@@ -278,85 +336,91 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | |||
278 | } | 336 | } |
279 | 337 | ||
280 | v->sequence=vb->sequence; | 338 | v->sequence=vb->sequence; |
339 | int n=ci->blocksizes[v->W]/2; | ||
340 | int ln=ci->blocksizes[v->lW]/2; | ||
281 | 341 | ||
282 | if(vb->pcm){ /* no pcm to process if vorbis_synthesis_trackonly | 342 | if(LIKELY(vb->pcm)){ /* no pcm to process if vorbis_synthesis_trackonly |
283 | was called on block */ | 343 | was called on block */ |
284 | int n=ci->blocksizes[v->W]/2; | 344 | int prevCenter; |
285 | int n0=ci->blocksizes[0]/2; | 345 | int n0=ci->blocksizes[0]/2; |
286 | int n1=ci->blocksizes[1]/2; | 346 | int n1=ci->blocksizes[1]/2; |
287 | 347 | ||
288 | int thisCenter; | 348 | if(iram_pcm_doublebuffer) |
289 | int prevCenter; | 349 | { |
290 | 350 | prevCenter = ln; | |
291 | if(v->centerW){ | 351 | } |
292 | thisCenter=n1; | 352 | else |
293 | prevCenter=0; | 353 | { |
294 | }else{ | 354 | prevCenter = v->centerW; |
295 | thisCenter=0; | 355 | v->centerW = n1 - v->centerW; |
296 | prevCenter=n1; | ||
297 | } | 356 | } |
298 | |||
299 | /* v->pcm is now used like a two-stage double buffer. We don't want | ||
300 | to have to constantly shift *or* adjust memory usage. Don't | ||
301 | accept a new block until the old is shifted out */ | ||
302 | 357 | ||
303 | /* overlap/add PCM */ | 358 | /* overlap/add PCM */ |
304 | 359 | /* nb nothing to overlap with on first block so don't bother */ | |
305 | for(j=0;j<vi->channels;j++){ | 360 | if(LIKELY(v->pcm_returned!=-1)) |
306 | /* the overlap/add section */ | 361 | { |
307 | if(v->lW){ | 362 | for(j=0;j<vi->channels;j++) |
308 | if(v->W){ | 363 | { |
309 | /* large/large */ | 364 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; |
310 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; | 365 | ogg_int32_t *p=vb->pcm[j]; |
311 | ogg_int32_t *p=vb->pcm[j]; | 366 | |
312 | vect_add(p, pcm, n1); | 367 | /* the overlap/add section */ |
313 | v->pcmb[j]=p; | 368 | if(v->lW == v->W) |
314 | }else{ | 369 | { |
315 | /* large/small */ | 370 | /* large/large or small/small */ |
316 | ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; | 371 | vect_add_right_left(pcm,p,n); |
317 | ogg_int32_t *p=vb->pcm[j]; | 372 | v->pcmb[j]=pcm; |
318 | vect_add(pcm, p, n0); | ||
319 | v->pcmb[j]=v->pcm[j]+prevCenter; | ||
320 | } | 373 | } |
321 | }else{ | 374 | else if (!v->W) |
322 | if(v->W){ | 375 | { |
323 | /* small/large */ | 376 | /* large/small */ |
324 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; | 377 | vect_add_right_left(pcm + (n1-n0)/2, p, n0); |
325 | ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; | 378 | v->pcmb[j]=pcm; |
326 | vect_add(p, pcm, n0); | 379 | } |
327 | v->pcmb[j]=p; | 380 | else |
328 | }else{ | 381 | { |
329 | /* small/small */ | 382 | /* small/large */ |
330 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; | 383 | p += (n1-n0)/2; |
331 | ogg_int32_t *p=vb->pcm[j]; | 384 | vect_add_left_right(p,pcm,n0); |
332 | vect_add(p, pcm, n0); | 385 | v->pcmb[j]=p; |
333 | v->pcmb[j]=p; | ||
334 | } | 386 | } |
335 | } | 387 | } |
336 | 388 | } | |
337 | /* the copy section */ | 389 | |
390 | /* the copy section */ | ||
391 | if(iram_pcm_doublebuffer) | ||
392 | { | ||
393 | /* just flip the pointers over as we have a double buffer in iram */ | ||
394 | ogg_int32_t *p; | ||
395 | p=v->pcm[0]; | ||
396 | v->pcm[0]=vb->pcm[0]; | ||
397 | vb->pcm[0] = p; | ||
398 | p=v->pcm[1]; | ||
399 | v->pcm[1]=vb->pcm[1]; | ||
400 | vb->pcm[1] = p; | ||
401 | } | ||
402 | else | ||
403 | { | ||
404 | for(j=0;j<vi->channels;j++) | ||
338 | { | 405 | { |
339 | ogg_int32_t *pcm=v->pcm[j]+thisCenter; | 406 | /* at best only vb->pcm is in iram, and that's where we do the |
340 | ogg_int32_t *p=vb->pcm[j]+n; | 407 | synthesis, so we copy out the right-hand subframe of last |
341 | vect_copy(pcm, p, n); | 408 | synthesis into (noniram) local buffer so we can still do |
409 | synth in iram */ | ||
410 | vect_copy(v->pcm[j]+v->centerW, vb->pcm[j]+n, n); | ||
342 | } | 411 | } |
343 | } | 412 | } |
344 | 413 | ||
345 | if(v->centerW) | ||
346 | v->centerW=0; | ||
347 | else | ||
348 | v->centerW=n1; | ||
349 | |||
350 | /* deal with initial packet state; we do this using the explicit | 414 | /* deal with initial packet state; we do this using the explicit |
351 | pcm_returned==-1 flag otherwise we're sensitive to first block | 415 | pcm_returned==-1 flag otherwise we're sensitive to first block |
352 | being short or long */ | 416 | being short or long */ |
353 | 417 | ||
354 | if(v->pcm_returned==-1){ | 418 | if(v->pcm_returned==-1){ |
355 | v->pcm_returned=thisCenter; | 419 | v->pcm_returned=0; |
356 | v->pcm_current=thisCenter; | 420 | v->pcm_current=0; |
357 | }else{ | 421 | }else{ |
358 | v->pcm_returned=0; | 422 | v->pcm_returned=0; |
359 | v->pcm_current=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4; | 423 | v->pcm_current=(n+ln)/2; |
360 | } | 424 | } |
361 | 425 | ||
362 | } | 426 | } |
@@ -375,7 +439,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | |||
375 | if(b->sample_count==-1){ | 439 | if(b->sample_count==-1){ |
376 | b->sample_count=0; | 440 | b->sample_count=0; |
377 | }else{ | 441 | }else{ |
378 | b->sample_count+=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4; | 442 | b->sample_count+=(n+ln)/2; |
379 | } | 443 | } |
380 | 444 | ||
381 | if(v->granulepos==-1){ | 445 | if(v->granulepos==-1){ |
@@ -406,7 +470,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | |||
406 | 470 | ||
407 | } | 471 | } |
408 | }else{ | 472 | }else{ |
409 | v->granulepos+=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4; | 473 | v->granulepos+=(n+ln)/2; |
410 | if(vb->granulepos!=-1 && v->granulepos!=vb->granulepos){ | 474 | if(vb->granulepos!=-1 && v->granulepos!=vb->granulepos){ |
411 | 475 | ||
412 | if(v->granulepos>vb->granulepos){ | 476 | if(v->granulepos>vb->granulepos){ |