summaryrefslogtreecommitdiff
path: root/apps/dsp.c
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2005-11-15 10:05:01 +0000
committerThom Johansen <thomj@rockbox.org>2005-11-15 10:05:01 +0000
commitdf37db99ce020a420d80dcf3499a0c3de6904103 (patch)
tree18ff114a22201d6a158701f8a49c43608d486aba /apps/dsp.c
parent4202df0549c885af9990bcf7e3b2f2b2d01fea17 (diff)
downloadrockbox-df37db99ce020a420d80dcf3499a0c3de6904103.tar.gz
rockbox-df37db99ce020a420d80dcf3499a0c3de6904103.zip
Added Coldfire ASM optimised crossfeed function. Have not measured speed improvement, but it is faster. Also moved activation check for crossfeed out of function to avoid a function call if it's not activated.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@7891 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/dsp.c')
-rw-r--r--apps/dsp.c171
1 files changed, 124 insertions, 47 deletions
diff --git a/apps/dsp.c b/apps/dsp.c
index 0ccef679d0..15205e9fb1 100644
--- a/apps/dsp.c
+++ b/apps/dsp.c
@@ -169,7 +169,7 @@ struct crossfeed_data
169static struct dsp_config dsp_conf[2] IBSS_ATTR; 169static struct dsp_config dsp_conf[2] IBSS_ATTR;
170static struct dither_data dither_data[2] IBSS_ATTR; 170static struct dither_data dither_data[2] IBSS_ATTR;
171static struct resample_data resample_data[2][2] IBSS_ATTR; 171static struct resample_data resample_data[2][2] IBSS_ATTR;
172static struct crossfeed_data crossfeed_data IBSS_ATTR; 172struct crossfeed_data crossfeed_data IBSS_ATTR;
173 173
174extern int current_codec; 174extern int current_codec;
175struct dsp_config *dsp; 175struct dsp_config *dsp;
@@ -446,71 +446,147 @@ static long dither_sample(long sample, long bias, long mask,
446 * the src array if gain was applied. 446 * the src array if gain was applied.
447 * Note that this must be called before the resampler. 447 * Note that this must be called before the resampler.
448 */ 448 */
449#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
450static const long crossfeed_coefs[6] ICONST_ATTR = {
451 LOW, LOW_COMP, HIGH_NEG, HIGH_COMP, ATT, ATT_COMP
452};
453
449static void apply_crossfeed(long* src[], int count) 454static void apply_crossfeed(long* src[], int count)
450{ 455{
456 asm volatile (
457 "lea.l crossfeed_data, %%a1 \n"
458 "lea.l (16, %%a1), %%a0 \n"
459 "movem.l (%%a1), %%d0-%%d3 \n"
460 "move.l (120, %%a1), %%d4 \n"
461 /* fetch left, right, LOW and LOW_COMP for first iteration */
462 "move.l (%[src0]), %%d5 \n"
463 "move.l (%[src1]), %%d6 \n"
464 "move.l (%[coef])+, %%a1 \n"
465 "move.l (%[coef])+, %%a2 \n"
466 /* Register usage in loop:
467 * a0 = &delay[0][0], a1 & a2 = coefs
468 * d0 = low_left, d1 = low_right,
469 * d2 = high_left, d3 = high_right,
470 * d4 = delay line index,
471 * d5 = src[0][i], d6 = src[1][i].
472 * The rest are described in asm constraint list.
473 */
474 ".cfloop:"
475 /* LOW*low_left + LOW_COMP*left */
476 "mac.l %%a1, %%d0, %%acc0 \n"
477 "mac.l %%a2, %%d5, %%acc0 \n"
478 /* LOW*low_right + LOW_COMP*right */
479 "mac.l %%a1, %%d1, (%[coef])+, %%a1, %%acc1 \n" /* a1 = HIGH_NEG */
480 "mac.l %%a2, %%d6, (%[coef])+, %%a2, %%acc1 \n" /* a2 = HIGH_COMP */
481 "movclr.l %%acc0, %%d0 \n" /* get low_left */
482 "movclr.l %%acc1, %%d1 \n" /* get low_right */
483 /* HIGH_NEG*high_left + HIGH_COMP*left */
484 "mac.l %%a1, %%d2, %%acc0 \n"
485 "mac.l %%a2, %%d5, %%acc0 \n"
486 /* HIGH_NEG*hifh_right + HIGH_COMP+*right */
487 "mac.l %%a1, %%d3, (%[coef])+, %%a1, %%acc1 \n" /* a1 = ATT */
488 "mac.l %%a2, %%d6, (%[coef])+, %%a2, %%acc1 \n" /* a2 = ATT_COMP */
489 "lea.l (-6*4, %[coef]), %[coef] \n" /* coef = &coefs[0] */
490 "move.l (%%a0, %%d4*4), %%a3 \n" /* a3=delay[0][idx] */
491 "move.l (52, %%a0, %%d4*4), %%d5 \n" /* d5=delay[1][idx] */
492 "movclr.l %%acc0, %%d2 \n" /* get high_left */
493 "movclr.l %%acc1, %%d3 \n" /* get high_right */
494 /* ATT*delay_r + ATT_COMP*high_left */
495 "mac.l %%a1, %%d5, (4, %[src0]), %%d5, %%acc0\n" /* d5 = src[0][i+1] */
496 "mac.l %%a2, %%d2, (4, %[src1]), %%d6, %%acc0\n" /* d6 = src[1][i+1] */
497 /* ATT*delay_l + ATT_COMP*high_right */
498 "mac.l %%a1, %%a3, (%[coef])+, %%a1, %%acc1 \n" /* a1 = LOW */
499 "mac.l %%a2, %%d3, (%[coef])+, %%a2, %%acc1 \n" /* a2 = LOW_COMP */
500
501 /* save crossfed samples to output */
502 "movclr.l %%acc0, %%a3 \n"
503 "move.l %%a3, (%[src0])+ \n" /* src[0][i++] = out_l */
504 "movclr.l %%acc1, %%a3 \n"
505 "move.l %%a3, (%[src1])+ \n" /* src[1][i++] = out_r */
506 "move.l %%d0, (%%a0, %%d4*4) \n" /* delay[0][index] = low_left */
507 "move.l %%d1, (52, %%a0, %%d4*4)\n" /* delay[1][index] = low_right */
508 "addq.l #1, %%d4 \n" /* index++ */
509 "cmp.l #13, %%d4 \n" /* if (index >= 13) { */
510 "jlt .nowrap \n"
511 "clr.l %%d4 \n" /* index = 0 */
512 ".nowrap: \n" /* } */
513 "subq.l #1, %[count] \n"
514 "jne .cfloop \n"
515 /* save data back to struct */
516 "lea.l crossfeed_data, %%a1 \n"
517 "movem.l %%d0-%%d3, (%%a1) \n"
518 "move.l %%d4, (120, %%a1) \n"
519 /* NOTE: We _just_ have enough registers for our use here, clobber just
520 one more and GCC will fail. */
521 :
522 : [count] "d" (count),
523 [src0] "a" (src[0]), [src1] "a" (src[1]), [coef] "a" (crossfeed_coefs)
524 : "d0", "d1", "d2", "d3", "d4", "d5", "d6",
525 "a0", "a1", "a2", "a3"
526 );
527}
528#else
529static void apply_crossfeed(long* src[], int count)
530{
531 long a; /* accumulator */
451 532
452 if (dsp->crossfeed_enabled && src[0] != src[1]) 533 long low_left = crossfeed_data.lowpass[0];
453 { 534 long low_right = crossfeed_data.lowpass[1];
454 long a; /* accumulator */ 535 long high_left = crossfeed_data.highpass[0];
455 536 long high_right = crossfeed_data.highpass[1];
456 long low_left = crossfeed_data.lowpass[0]; 537 unsigned int index = crossfeed_data.index;
457 long low_right = crossfeed_data.lowpass[1];
458 long high_left = crossfeed_data.highpass[0];
459 long high_right = crossfeed_data.highpass[1];
460 unsigned int index = crossfeed_data.index;
461
462 long left, right;
463
464 long * delay_l = crossfeed_data.delay[0];
465 long * delay_r = crossfeed_data.delay[1];
466 538
467 int i; 539 long left, right;
468 540
469 for (i = 0; i < count; i++) 541 long * delay_l = crossfeed_data.delay[0];
470 { 542 long * delay_r = crossfeed_data.delay[1];
471 /* use a low-pass filter on the signal */
472 left = src[0][i];
473 right = src[1][i];
474 543
475 ACC_INIT(a, LOW, low_left); ACC(a, LOW_COMP, left); 544 int i;
476 low_left = GET_ACC(a);
477 545
478 ACC_INIT(a, LOW, low_right); ACC(a, LOW_COMP, right); 546 for (i = 0; i < count; i++)
479 low_right = GET_ACC(a); 547 {
548 /* use a low-pass filter on the signal */
549 left = src[0][i];
550 right = src[1][i];
480 551
481 /* use a high-pass filter on the signal */ 552 ACC_INIT(a, LOW, low_left); ACC(a, LOW_COMP, left);
553 low_left = GET_ACC(a);
482 554
483 ACC_INIT(a, HIGH_NEG, high_left); ACC(a, HIGH_COMP, left); 555 ACC_INIT(a, LOW, low_right); ACC(a, LOW_COMP, right);
484 high_left = GET_ACC(a); 556 low_right = GET_ACC(a);
485 557
486 ACC_INIT(a, HIGH_NEG, high_right); ACC(a, HIGH_COMP, right); 558 /* use a high-pass filter on the signal */
487 high_right = GET_ACC(a);
488 559
489 /* New data is the high-passed signal + delayed and attenuated 560 ACC_INIT(a, HIGH_NEG, high_left); ACC(a, HIGH_COMP, left);
490 * low-passed signal from the other channel */ 561 high_left = GET_ACC(a);
491 562
492 ACC_INIT(a, ATT, delay_r[index]); ACC(a, ATT_COMP, high_left); 563 ACC_INIT(a, HIGH_NEG, high_right); ACC(a, HIGH_COMP, right);
493 src[0][i] = GET_ACC(a); 564 high_right = GET_ACC(a);
494 565
495 ACC_INIT(a, ATT, delay_l[index]); ACC(a, ATT_COMP, high_right); 566 /* New data is the high-passed signal + delayed and attenuated
496 src[1][i] = GET_ACC(a); 567 * low-passed signal from the other channel */
497 568
498 /* Store the low-passed signal in the ringbuffer */ 569 ACC_INIT(a, ATT, delay_r[index]); ACC(a, ATT_COMP, high_left);
570 src[0][i] = GET_ACC(a);
499 571
500 delay_l[index] = low_left; 572 ACC_INIT(a, ATT, delay_l[index]); ACC(a, ATT_COMP, high_right);
501 delay_r[index] = low_right; 573 src[1][i] = GET_ACC(a);
502 574
503 index = (index + 1) % 13; 575 /* Store the low-passed signal in the ringbuffer */
504 }
505 576
506 crossfeed_data.index = index; 577 delay_l[index] = low_left;
507 crossfeed_data.lowpass[0] = low_left; 578 delay_r[index] = low_right;
508 crossfeed_data.lowpass[1] = low_right;
509 crossfeed_data.highpass[0] = high_left;
510 crossfeed_data.highpass[1] = high_right;
511 579
580 index = (index + 1) % 13;
512 } 581 }
582
583 crossfeed_data.index = index;
584 crossfeed_data.lowpass[0] = low_left;
585 crossfeed_data.lowpass[1] = low_right;
586 crossfeed_data.highpass[0] = high_left;
587 crossfeed_data.highpass[1] = high_right;
513} 588}
589#endif
514 590
515/* Apply a constant gain to the samples (e.g., for ReplayGain). May update 591/* Apply a constant gain to the samples (e.g., for ReplayGain). May update
516 * the src array if gain was applied. 592 * the src array if gain was applied.
@@ -615,7 +691,8 @@ long dsp_process(char* dst, char* src[], long size)
615 size -= samples; 691 size -= samples;
616 apply_gain(tmp, samples); 692 apply_gain(tmp, samples);
617 samples = resample(tmp, samples); 693 samples = resample(tmp, samples);
618 apply_crossfeed(tmp, samples); 694 if (dsp->crossfeed_enabled && dsp->stereo_mode != STEREO_MONO)
695 apply_crossfeed(tmp, samples);
619 write_samples((short*) dst, tmp, samples); 696 write_samples((short*) dst, tmp, samples);
620 written += samples; 697 written += samples;
621 dst += samples * sizeof(short) * 2; 698 dst += samples * sizeof(short) * 2;