diff options
author | Thom Johansen <thomj@rockbox.org> | 2005-11-15 10:05:01 +0000 |
---|---|---|
committer | Thom Johansen <thomj@rockbox.org> | 2005-11-15 10:05:01 +0000 |
commit | df37db99ce020a420d80dcf3499a0c3de6904103 (patch) | |
tree | 18ff114a22201d6a158701f8a49c43608d486aba | |
parent | 4202df0549c885af9990bcf7e3b2f2b2d01fea17 (diff) | |
download | rockbox-df37db99ce020a420d80dcf3499a0c3de6904103.tar.gz rockbox-df37db99ce020a420d80dcf3499a0c3de6904103.zip |
Added Coldfire ASM optimised crossfeed function. Have not measured speed improvement, but it is faster. Also moved activation check for crossfeed out of function to avoid a function call if it's not activated.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@7891 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r-- | apps/dsp.c | 171 |
1 files changed, 124 insertions, 47 deletions
diff --git a/apps/dsp.c b/apps/dsp.c index 0ccef679d0..15205e9fb1 100644 --- a/apps/dsp.c +++ b/apps/dsp.c | |||
@@ -169,7 +169,7 @@ struct crossfeed_data | |||
169 | static struct dsp_config dsp_conf[2] IBSS_ATTR; | 169 | static struct dsp_config dsp_conf[2] IBSS_ATTR; |
170 | static struct dither_data dither_data[2] IBSS_ATTR; | 170 | static struct dither_data dither_data[2] IBSS_ATTR; |
171 | static struct resample_data resample_data[2][2] IBSS_ATTR; | 171 | static struct resample_data resample_data[2][2] IBSS_ATTR; |
172 | static struct crossfeed_data crossfeed_data IBSS_ATTR; | 172 | struct crossfeed_data crossfeed_data IBSS_ATTR; |
173 | 173 | ||
174 | extern int current_codec; | 174 | extern int current_codec; |
175 | struct dsp_config *dsp; | 175 | struct dsp_config *dsp; |
@@ -446,71 +446,147 @@ static long dither_sample(long sample, long bias, long mask, | |||
446 | * the src array if gain was applied. | 446 | * the src array if gain was applied. |
447 | * Note that this must be called before the resampler. | 447 | * Note that this must be called before the resampler. |
448 | */ | 448 | */ |
449 | #if defined(CPU_COLDFIRE) && !defined(SIMULATOR) | ||
450 | static const long crossfeed_coefs[6] ICONST_ATTR = { | ||
451 | LOW, LOW_COMP, HIGH_NEG, HIGH_COMP, ATT, ATT_COMP | ||
452 | }; | ||
453 | |||
449 | static void apply_crossfeed(long* src[], int count) | 454 | static void apply_crossfeed(long* src[], int count) |
450 | { | 455 | { |
456 | asm volatile ( | ||
457 | "lea.l crossfeed_data, %%a1 \n" | ||
458 | "lea.l (16, %%a1), %%a0 \n" | ||
459 | "movem.l (%%a1), %%d0-%%d3 \n" | ||
460 | "move.l (120, %%a1), %%d4 \n" | ||
461 | /* fetch left, right, LOW and LOW_COMP for first iteration */ | ||
462 | "move.l (%[src0]), %%d5 \n" | ||
463 | "move.l (%[src1]), %%d6 \n" | ||
464 | "move.l (%[coef])+, %%a1 \n" | ||
465 | "move.l (%[coef])+, %%a2 \n" | ||
466 | /* Register usage in loop: | ||
467 | * a0 = &delay[0][0], a1 & a2 = coefs | ||
468 | * d0 = low_left, d1 = low_right, | ||
469 | * d2 = high_left, d3 = high_right, | ||
470 | * d4 = delay line index, | ||
471 | * d5 = src[0][i], d6 = src[1][i]. | ||
472 | * The rest are described in asm constraint list. | ||
473 | */ | ||
474 | ".cfloop:" | ||
475 | /* LOW*low_left + LOW_COMP*left */ | ||
476 | "mac.l %%a1, %%d0, %%acc0 \n" | ||
477 | "mac.l %%a2, %%d5, %%acc0 \n" | ||
478 | /* LOW*low_right + LOW_COMP*right */ | ||
479 | "mac.l %%a1, %%d1, (%[coef])+, %%a1, %%acc1 \n" /* a1 = HIGH_NEG */ | ||
480 | "mac.l %%a2, %%d6, (%[coef])+, %%a2, %%acc1 \n" /* a2 = HIGH_COMP */ | ||
481 | "movclr.l %%acc0, %%d0 \n" /* get low_left */ | ||
482 | "movclr.l %%acc1, %%d1 \n" /* get low_right */ | ||
483 | /* HIGH_NEG*high_left + HIGH_COMP*left */ | ||
484 | "mac.l %%a1, %%d2, %%acc0 \n" | ||
485 | "mac.l %%a2, %%d5, %%acc0 \n" | ||
486 | /* HIGH_NEG*hifh_right + HIGH_COMP+*right */ | ||
487 | "mac.l %%a1, %%d3, (%[coef])+, %%a1, %%acc1 \n" /* a1 = ATT */ | ||
488 | "mac.l %%a2, %%d6, (%[coef])+, %%a2, %%acc1 \n" /* a2 = ATT_COMP */ | ||
489 | "lea.l (-6*4, %[coef]), %[coef] \n" /* coef = &coefs[0] */ | ||
490 | "move.l (%%a0, %%d4*4), %%a3 \n" /* a3=delay[0][idx] */ | ||
491 | "move.l (52, %%a0, %%d4*4), %%d5 \n" /* d5=delay[1][idx] */ | ||
492 | "movclr.l %%acc0, %%d2 \n" /* get high_left */ | ||
493 | "movclr.l %%acc1, %%d3 \n" /* get high_right */ | ||
494 | /* ATT*delay_r + ATT_COMP*high_left */ | ||
495 | "mac.l %%a1, %%d5, (4, %[src0]), %%d5, %%acc0\n" /* d5 = src[0][i+1] */ | ||
496 | "mac.l %%a2, %%d2, (4, %[src1]), %%d6, %%acc0\n" /* d6 = src[1][i+1] */ | ||
497 | /* ATT*delay_l + ATT_COMP*high_right */ | ||
498 | "mac.l %%a1, %%a3, (%[coef])+, %%a1, %%acc1 \n" /* a1 = LOW */ | ||
499 | "mac.l %%a2, %%d3, (%[coef])+, %%a2, %%acc1 \n" /* a2 = LOW_COMP */ | ||
500 | |||
501 | /* save crossfed samples to output */ | ||
502 | "movclr.l %%acc0, %%a3 \n" | ||
503 | "move.l %%a3, (%[src0])+ \n" /* src[0][i++] = out_l */ | ||
504 | "movclr.l %%acc1, %%a3 \n" | ||
505 | "move.l %%a3, (%[src1])+ \n" /* src[1][i++] = out_r */ | ||
506 | "move.l %%d0, (%%a0, %%d4*4) \n" /* delay[0][index] = low_left */ | ||
507 | "move.l %%d1, (52, %%a0, %%d4*4)\n" /* delay[1][index] = low_right */ | ||
508 | "addq.l #1, %%d4 \n" /* index++ */ | ||
509 | "cmp.l #13, %%d4 \n" /* if (index >= 13) { */ | ||
510 | "jlt .nowrap \n" | ||
511 | "clr.l %%d4 \n" /* index = 0 */ | ||
512 | ".nowrap: \n" /* } */ | ||
513 | "subq.l #1, %[count] \n" | ||
514 | "jne .cfloop \n" | ||
515 | /* save data back to struct */ | ||
516 | "lea.l crossfeed_data, %%a1 \n" | ||
517 | "movem.l %%d0-%%d3, (%%a1) \n" | ||
518 | "move.l %%d4, (120, %%a1) \n" | ||
519 | /* NOTE: We _just_ have enough registers for our use here, clobber just | ||
520 | one more and GCC will fail. */ | ||
521 | : | ||
522 | : [count] "d" (count), | ||
523 | [src0] "a" (src[0]), [src1] "a" (src[1]), [coef] "a" (crossfeed_coefs) | ||
524 | : "d0", "d1", "d2", "d3", "d4", "d5", "d6", | ||
525 | "a0", "a1", "a2", "a3" | ||
526 | ); | ||
527 | } | ||
528 | #else | ||
529 | static void apply_crossfeed(long* src[], int count) | ||
530 | { | ||
531 | long a; /* accumulator */ | ||
451 | 532 | ||
452 | if (dsp->crossfeed_enabled && src[0] != src[1]) | 533 | long low_left = crossfeed_data.lowpass[0]; |
453 | { | 534 | long low_right = crossfeed_data.lowpass[1]; |
454 | long a; /* accumulator */ | 535 | long high_left = crossfeed_data.highpass[0]; |
455 | 536 | long high_right = crossfeed_data.highpass[1]; | |
456 | long low_left = crossfeed_data.lowpass[0]; | 537 | unsigned int index = crossfeed_data.index; |
457 | long low_right = crossfeed_data.lowpass[1]; | ||
458 | long high_left = crossfeed_data.highpass[0]; | ||
459 | long high_right = crossfeed_data.highpass[1]; | ||
460 | unsigned int index = crossfeed_data.index; | ||
461 | |||
462 | long left, right; | ||
463 | |||
464 | long * delay_l = crossfeed_data.delay[0]; | ||
465 | long * delay_r = crossfeed_data.delay[1]; | ||
466 | 538 | ||
467 | int i; | 539 | long left, right; |
468 | 540 | ||
469 | for (i = 0; i < count; i++) | 541 | long * delay_l = crossfeed_data.delay[0]; |
470 | { | 542 | long * delay_r = crossfeed_data.delay[1]; |
471 | /* use a low-pass filter on the signal */ | ||
472 | left = src[0][i]; | ||
473 | right = src[1][i]; | ||
474 | 543 | ||
475 | ACC_INIT(a, LOW, low_left); ACC(a, LOW_COMP, left); | 544 | int i; |
476 | low_left = GET_ACC(a); | ||
477 | 545 | ||
478 | ACC_INIT(a, LOW, low_right); ACC(a, LOW_COMP, right); | 546 | for (i = 0; i < count; i++) |
479 | low_right = GET_ACC(a); | 547 | { |
548 | /* use a low-pass filter on the signal */ | ||
549 | left = src[0][i]; | ||
550 | right = src[1][i]; | ||
480 | 551 | ||
481 | /* use a high-pass filter on the signal */ | 552 | ACC_INIT(a, LOW, low_left); ACC(a, LOW_COMP, left); |
553 | low_left = GET_ACC(a); | ||
482 | 554 | ||
483 | ACC_INIT(a, HIGH_NEG, high_left); ACC(a, HIGH_COMP, left); | 555 | ACC_INIT(a, LOW, low_right); ACC(a, LOW_COMP, right); |
484 | high_left = GET_ACC(a); | 556 | low_right = GET_ACC(a); |
485 | 557 | ||
486 | ACC_INIT(a, HIGH_NEG, high_right); ACC(a, HIGH_COMP, right); | 558 | /* use a high-pass filter on the signal */ |
487 | high_right = GET_ACC(a); | ||
488 | 559 | ||
489 | /* New data is the high-passed signal + delayed and attenuated | 560 | ACC_INIT(a, HIGH_NEG, high_left); ACC(a, HIGH_COMP, left); |
490 | * low-passed signal from the other channel */ | 561 | high_left = GET_ACC(a); |
491 | 562 | ||
492 | ACC_INIT(a, ATT, delay_r[index]); ACC(a, ATT_COMP, high_left); | 563 | ACC_INIT(a, HIGH_NEG, high_right); ACC(a, HIGH_COMP, right); |
493 | src[0][i] = GET_ACC(a); | 564 | high_right = GET_ACC(a); |
494 | 565 | ||
495 | ACC_INIT(a, ATT, delay_l[index]); ACC(a, ATT_COMP, high_right); | 566 | /* New data is the high-passed signal + delayed and attenuated |
496 | src[1][i] = GET_ACC(a); | 567 | * low-passed signal from the other channel */ |
497 | 568 | ||
498 | /* Store the low-passed signal in the ringbuffer */ | 569 | ACC_INIT(a, ATT, delay_r[index]); ACC(a, ATT_COMP, high_left); |
570 | src[0][i] = GET_ACC(a); | ||
499 | 571 | ||
500 | delay_l[index] = low_left; | 572 | ACC_INIT(a, ATT, delay_l[index]); ACC(a, ATT_COMP, high_right); |
501 | delay_r[index] = low_right; | 573 | src[1][i] = GET_ACC(a); |
502 | 574 | ||
503 | index = (index + 1) % 13; | 575 | /* Store the low-passed signal in the ringbuffer */ |
504 | } | ||
505 | 576 | ||
506 | crossfeed_data.index = index; | 577 | delay_l[index] = low_left; |
507 | crossfeed_data.lowpass[0] = low_left; | 578 | delay_r[index] = low_right; |
508 | crossfeed_data.lowpass[1] = low_right; | ||
509 | crossfeed_data.highpass[0] = high_left; | ||
510 | crossfeed_data.highpass[1] = high_right; | ||
511 | 579 | ||
580 | index = (index + 1) % 13; | ||
512 | } | 581 | } |
582 | |||
583 | crossfeed_data.index = index; | ||
584 | crossfeed_data.lowpass[0] = low_left; | ||
585 | crossfeed_data.lowpass[1] = low_right; | ||
586 | crossfeed_data.highpass[0] = high_left; | ||
587 | crossfeed_data.highpass[1] = high_right; | ||
513 | } | 588 | } |
589 | #endif | ||
514 | 590 | ||
515 | /* Apply a constant gain to the samples (e.g., for ReplayGain). May update | 591 | /* Apply a constant gain to the samples (e.g., for ReplayGain). May update |
516 | * the src array if gain was applied. | 592 | * the src array if gain was applied. |
@@ -615,7 +691,8 @@ long dsp_process(char* dst, char* src[], long size) | |||
615 | size -= samples; | 691 | size -= samples; |
616 | apply_gain(tmp, samples); | 692 | apply_gain(tmp, samples); |
617 | samples = resample(tmp, samples); | 693 | samples = resample(tmp, samples); |
618 | apply_crossfeed(tmp, samples); | 694 | if (dsp->crossfeed_enabled && dsp->stereo_mode != STEREO_MONO) |
695 | apply_crossfeed(tmp, samples); | ||
619 | write_samples((short*) dst, tmp, samples); | 696 | write_samples((short*) dst, tmp, samples); |
620 | written += samples; | 697 | written += samples; |
621 | dst += samples * sizeof(short) * 2; | 698 | dst += samples * sizeof(short) * 2; |