summaryrefslogtreecommitdiff
path: root/apps/codecs/spc/spc_dsp.c
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/spc/spc_dsp.c')
-rw-r--r--apps/codecs/spc/spc_dsp.c232
1 files changed, 168 insertions, 64 deletions
diff --git a/apps/codecs/spc/spc_dsp.c b/apps/codecs/spc/spc_dsp.c
index 8881788cf1..19986fd8a8 100644
--- a/apps/codecs/spc/spc_dsp.c
+++ b/apps/codecs/spc/spc_dsp.c
@@ -25,14 +25,13 @@
25#include "spc_codec.h" 25#include "spc_codec.h"
26#include "spc_profiler.h" 26#include "spc_profiler.h"
27 27
28#ifdef CPU_COLDFIRE 28#if defined(CPU_COLDFIRE) || defined (CPU_ARM)
29static int32_t fir_buf[FIR_BUF_HALF] 29int32_t fir_buf[FIR_BUF_CNT]
30 __attribute__ ((aligned (FIR_BUF_SIZE*2))) IBSS_ATTR; 30 __attribute__ ((aligned (FIR_BUF_ALIGN*1))) IBSS_ATTR;
31#endif 31#endif
32
33#if SPC_BRRCACHE 32#if SPC_BRRCACHE
34/* a little extra for samples that go past end */ 33/* a little extra for samples that go past end */
35int16_t BRRcache [0x20000 + 32]; 34int16_t BRRcache [BRR_CACHE_SIZE];
36#endif 35#endif
37 36
38void DSP_write( struct Spc_Dsp* this, int i, int data ) 37void DSP_write( struct Spc_Dsp* this, int i, int data )
@@ -58,11 +57,12 @@ void DSP_write( struct Spc_Dsp* this, int i, int data )
58 57
59/* if ( n < -32768 ) out = -32768; */ 58/* if ( n < -32768 ) out = -32768; */
60/* if ( n > 32767 ) out = 32767; */ 59/* if ( n > 32767 ) out = 32767; */
61#define CLAMP16( n, out )\ 60#define CLAMP16( n ) \
62{\ 61({ \
63 if ( (int16_t) n != n )\ 62 if ( (int16_t) n != n ) \
64 out = 0x7FFF ^ (n >> 31);\ 63 n = 0x7FFF ^ (n >> 31); \
65} 64 n; \
65})
66 66
67#if SPC_BRRCACHE 67#if SPC_BRRCACHE
68static void decode_brr( struct Spc_Dsp* this, unsigned start_addr, 68static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
@@ -181,7 +181,7 @@ static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
181 smp2 = smp1; 181 smp2 = smp1;
182 } 182 }
183 183
184 CLAMP16( delta, delta ); 184 delta = CLAMP16( delta );
185 smp1 = (int16_t) (delta * 2); /* sign-extend */ 185 smp1 = (int16_t) (delta * 2); /* sign-extend */
186 } 186 }
187 while ( (offset += 4) != 0 ); 187 while ( (offset += 4) != 0 );
@@ -359,7 +359,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
359 #define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF) 359 #define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF)
360 #define IF_RBE(...) 360 #define IF_RBE(...)
361 #endif /* ROCKBOX_BIG_ENDIAN */ 361 #endif /* ROCKBOX_BIG_ENDIAN */
362 362
363#if !SPC_NOINTERP 363#if !SPC_NOINTERP
364 int const slow_gaussian = (this->r.g.pitch_mods >> 1) | 364 int const slow_gaussian = (this->r.g.pitch_mods >> 1) |
365 this->r.g.noise_enables; 365 this->r.g.noise_enables;
@@ -431,7 +431,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
431 431
432 /* Envelope */ 432 /* Envelope */
433 { 433 {
434 int const env_range = 0x800; 434 int const ENV_RANGE = 0x800;
435 int env_mode = voice->env_mode; 435 int env_mode = voice->env_mode;
436 int adsr0 = raw_voice->adsr [0]; 436 int adsr0 = raw_voice->adsr [0];
437 int env_timer; 437 int env_timer;
@@ -482,14 +482,14 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
482 482
483 int envx = voice->envx; 483 int envx = voice->envx;
484 484
485 int const step = env_range / 64; 485 int const step = ENV_RANGE / 64;
486 envx += step; 486 envx += step;
487 if ( t == 15 ) 487 if ( t == 15 )
488 envx += env_range / 2 - step; 488 envx += ENV_RANGE / 2 - step;
489 489
490 if ( envx >= env_range ) 490 if ( envx >= ENV_RANGE )
491 { 491 {
492 envx = env_range - 1; 492 envx = ENV_RANGE - 1;
493 voice->env_mode = state_decay; 493 voice->env_mode = state_decay;
494 } 494 }
495 voice->envx = envx; 495 voice->envx = envx;
@@ -516,7 +516,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
516 int mode = t >> 5; 516 int mode = t >> 5;
517 if ( mode <= 5 ) /* decay */ 517 if ( mode <= 5 ) /* decay */
518 { 518 {
519 int step = env_range / 64; 519 int step = ENV_RANGE / 64;
520 if ( mode == 5 ) /* exponential */ 520 if ( mode == 5 ) /* exponential */
521 { 521 {
522 envx--; /* envx *= 255 / 256 */ 522 envx--; /* envx *= 255 / 256 */
@@ -531,14 +531,14 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
531 } 531 }
532 else /* attack */ 532 else /* attack */
533 { 533 {
534 int const step = env_range / 64; 534 int const step = ENV_RANGE / 64;
535 envx += step; 535 envx += step;
536 if ( mode == 7 && 536 if ( mode == 7 &&
537 envx >= env_range * 3 / 4 + step ) 537 envx >= ENV_RANGE * 3 / 4 + step )
538 envx += env_range / 256 - step; 538 envx += ENV_RANGE / 256 - step;
539 539
540 if ( envx >= env_range ) 540 if ( envx >= ENV_RANGE )
541 envx = env_range - 1; 541 envx = ENV_RANGE - 1;
542 } 542 }
543 voice->envx = envx; 543 voice->envx = envx;
544 /* TODO: should this be 8? */ 544 /* TODO: should this be 8? */
@@ -550,7 +550,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
550 else /* state_release */ 550 else /* state_release */
551 { 551 {
552 int envx = voice->envx; 552 int envx = voice->envx;
553 if ( (envx -= env_range / 256) > 0 ) 553 if ( (envx -= ENV_RANGE / 256) > 0 )
554 { 554 {
555 voice->envx = envx; 555 voice->envx = envx;
556 raw_voice->envx = envx >> 8; 556 raw_voice->envx = envx >> 8;
@@ -683,7 +683,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
683 smp2 = smp1; 683 smp2 = smp1;
684 } 684 }
685 685
686 CLAMP16( delta, delta ); 686 delta = CLAMP16( delta );
687 smp1 = (int16_t) (delta * 2); /* sign-extend */ 687 smp1 = (int16_t) (delta * 2); /* sign-extend */
688 } 688 }
689 while ( (offset += 4) != 0 ); 689 while ( (offset += 4) != 0 );
@@ -778,7 +778,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
778 output = (output + rev [1] * interp [2]) >> 12; 778 output = (output + rev [1] * interp [2]) >> 12;
779 output = (int16_t) (output * 2); 779 output = (int16_t) (output * 2);
780 output += ((rev [0] * interp [3]) >> 12) * 2; 780 output += ((rev [0] * interp [3]) >> 12) * 2;
781 CLAMP16( output, output ); 781 output = CLAMP16( output );
782 } 782 }
783 output = (output * voice->envx) >> 11 & ~1; 783 output = (output * voice->envx) >> 11 & ~1;
784 784
@@ -788,7 +788,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
788 prev_outx = output; 788 prev_outx = output;
789 raw_voice->outx = (int8_t) (output >> 8); 789 raw_voice->outx = (int8_t) (output >> 8);
790 } 790 }
791 #else 791 #else /* SPCNOINTERP */
792 /* two-point linear interpolation */ 792 /* two-point linear interpolation */
793 #ifdef CPU_COLDFIRE 793 #ifdef CPU_COLDFIRE
794 int amp_0 = (int16_t)this->noise; 794 int amp_0 = (int16_t)this->noise;
@@ -822,7 +822,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
822 /* output = y0 + (result >> 12) */ 822 /* output = y0 + (result >> 12) */
823 "asr.l %[sh], %[y1] \r\n" 823 "asr.l %[sh], %[y1] \r\n"
824 "add.l %[y0], %[y1] \r\n" 824 "add.l %[y0], %[y1] \r\n"
825 : [f]"+&d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0) 825 : [f]"+d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0)
826 : [s]"a"(voice->samples), [sh]"d"(12) 826 : [s]"a"(voice->samples), [sh]"d"(12)
827 ); 827 );
828 } 828 }
@@ -861,17 +861,49 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
861 "movclr.l %%acc1, %[amp_1] \r\n" 861 "movclr.l %%acc1, %[amp_1] \r\n"
862 : [amp_0]"=r"(amp_0), [amp_1]"=r"(amp_1) 862 : [amp_0]"=r"(amp_0), [amp_1]"=r"(amp_1)
863 ); 863 );
864 #else 864 #elif defined (CPU_ARM)
865 int amp_0, amp_1;
866
867 if ( (this->r.g.noise_enables & vbit) != 0 ) {
868 amp_0 = *(int16_t *)&this->noise;
869 } else {
870 uint32_t f = voice->position;
871 amp_0 = (uint32_t)voice->samples;
865 872
866 /* Try this one out on ARM and see - similar to above but the asm 873 asm volatile(
867 on coldfire removes a redundant register load worth 1 or 2%; 874 "mov %[y1], %[f], lsr #12 \r\n"
868 switching to loading two samples at once may help too. That's 875 "eor %[f], %[f], %[y1], lsl #12 \r\n"
869 done above and while 6 to 7% faster on cf over two 16 bit loads 876 "add %[y1], %[y0], %[y1], lsl #1 \r\n"
870 it makes it endian dependant. 877 "ldrsh %[y0], [%[y1], #2] \r\n"
871 878 "ldrsh %[y1], [%[y1], #4] \r\n"
872 measured small improvement (~1.5%) - hcs 879 "sub %[y1], %[y1], %[y0] \r\n"
873 */ 880 "mul %[f], %[y1], %[f] \r\n"
881 "add %[y0], %[y0], %[f], asr #12 \r\n"
882 : [f]"+r"(f), [y0]"+r"(amp_0), [y1]"=&r"(amp_1)
883 );
884 }
885
886 voice->position += rate;
887
888 asm volatile(
889 "mul %[amp_1], %[amp_0], %[envx] \r\n"
890 "mov %[amp_0], %[amp_1], asr #11 \r\n"
891 "mov %[amp_1], %[amp_0], asr #8 \r\n"
892 : [amp_0]"+r"(amp_0), [amp_1]"=&r"(amp_1)
893 : [envx]"r"(voice->envx)
894 );
895
896 prev_outx = amp_0;
897 raw_voice->outx = (int8_t)amp_1;
874 898
899 asm volatile(
900 "mul %[amp_1], %[amp_0], %[vol_1] \r\n"
901 "mul %[amp_0], %[vol_0], %[amp_0] \r\n"
902 : [amp_0]"+r"(amp_0), [amp_1]"+r"(amp_1)
903 : [vol_0]"r"((int)voice->volume[0]),
904 [vol_1]"r"((int)voice->volume[1])
905 );
906 #else /* Unoptimized CPU */
875 int output; 907 int output;
876 908
877 if ( (this->r.g.noise_enables & vbit) == 0 ) 909 if ( (this->r.g.noise_enables & vbit) == 0 )
@@ -884,19 +916,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
884 } 916 }
885 917
886 voice->position += rate; 918 voice->position += rate;
887 919
888 /* old version */
889#if 0
890 int fraction = voice->position & 0xFFF;
891 short const* const pos = voice->samples + (voice->position >> 12);
892 voice->position += rate;
893 int output =
894 (pos [2] * fraction + pos [1] * (0x1000 - fraction)) >> 12;
895 /* no interpolation (hardly faster, and crappy sounding) */
896 /*int output = pos [0];*/
897 if ( this->r.g.noise_enables & vbit )
898 output = *(int16_t*) &this->noise;
899#endif
900 output = (output * voice->envx) >> 11; 920 output = (output * voice->envx) >> 11;
901 921
902 /* duplicated here to give compiler more to run in parallel */ 922 /* duplicated here to give compiler more to run in parallel */
@@ -905,8 +925,8 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
905 925
906 prev_outx = output; 926 prev_outx = output;
907 raw_voice->outx = (int8_t) (output >> 8); 927 raw_voice->outx = (int8_t) (output >> 8);
908 #endif /* CPU_COLDFIRE */ 928 #endif /* CPU_* */
909 #endif 929 #endif /* SPCNOINTERP */
910 930
911 #if SPC_BRRCACHE 931 #if SPC_BRRCACHE
912 if ( voice->position >= voice->wave_end ) 932 if ( voice->position >= voice->wave_end )
@@ -1033,7 +1053,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
1033 "or.l %[sh], %[e0] \r\n" 1053 "or.l %[sh], %[e0] \r\n"
1034 /* save final feedback into echo buffer */ 1054 /* save final feedback into echo buffer */
1035 "move.l %[e0], (%[echo_ptr]) \r\n" 1055 "move.l %[e0], (%[echo_ptr]) \r\n"
1036 : [e0]"+&d"(echo_0), [e1]"+&d"(echo_1) 1056 : [e0]"+d"(echo_0), [e1]"+d"(echo_1)
1037 : [out_0]"r"(out_0), [out_1]"r"(out_1), 1057 : [out_0]"r"(out_0), [out_1]"r"(out_1),
1038 [ef]"r"((int)this->r.g.echo_feedback), 1058 [ef]"r"((int)this->r.g.echo_feedback),
1039 [echo_ptr]"a"((int32_t *)echo_ptr), 1059 [echo_ptr]"a"((int32_t *)echo_ptr),
@@ -1056,7 +1076,88 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
1056 out_buf [ 0] = out_0; 1076 out_buf [ 0] = out_0;
1057 out_buf [WAV_CHUNK_SIZE] = out_1; 1077 out_buf [WAV_CHUNK_SIZE] = out_1;
1058 out_buf ++; 1078 out_buf ++;
1059 #else /* !CPU_COLDFIRE */ 1079 #elif defined (CPU_ARM)
1080 /* Read feedback from echo buffer */
1081 int echo_pos = this->echo_pos;
1082 uint8_t* const echo_ptr = RAM +
1083 ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF);
1084 echo_pos += 4;
1085 if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 )
1086 echo_pos = 0;
1087 this->echo_pos = echo_pos;
1088
1089 int fb_0 = GET_LE16SA( echo_ptr );
1090 int fb_1 = GET_LE16SA( echo_ptr + 2 );
1091
1092 /* Keep last 8 samples */
1093 int32_t *fir_ptr = this->fir_ptr;
1094
1095 /* Apply FIR */
1096 asm volatile (
1097 "str %[fb_0], [%[fir_p]], #4 \r\n"
1098 "str %[fb_1], [%[fir_p]], #4 \r\n"
1099 /* duplicate at +8 eliminates wrap checking below */
1100 "str %[fb_0], [%[fir_p], #56] \r\n"
1101 "str %[fb_1], [%[fir_p], #60] \r\n"
1102 : [fir_p]"+r"(fir_ptr)
1103 : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1)
1104 );
1105
1106 this->fir_ptr = (int32_t *)((intptr_t)fir_ptr & FIR_BUF_MASK);
1107 int32_t *fir_coeff = this->fir_coeff;
1108
1109 asm volatile (
1110 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1111 "ldmia %[fir_p]!, { r4-r5 } \r\n"
1112 "mul %[fb_0], r0, %[fb_0] \r\n"
1113 "mul %[fb_1], r0, %[fb_1] \r\n"
1114 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1115 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1116 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1117 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1118 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1119 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1120 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1121 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1122 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1123 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1124 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1125 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1126 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1127 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1128 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1129 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1130 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1131 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1132 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1133 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1134 : [fb_0]"+r"(fb_0), [fb_1]"+r"(fb_1),
1135 [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff)
1136 :
1137 : "r0", "r1", "r2", "r3", "r4", "r5"
1138 );
1139
1140 /* Generate output */
1141 int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
1142 >> global_muting;
1143 int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
1144 >> global_muting;
1145
1146 out_buf [ 0] = amp_0;
1147 out_buf [WAV_CHUNK_SIZE] = amp_1;
1148 out_buf ++;
1149
1150 if ( !(this->r.g.flags & 0x20) )
1151 {
1152 /* Feedback into echo buffer */
1153 int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1154 int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1155 e0 = CLAMP16( e0 );
1156 SET_LE16A( echo_ptr , e0 );
1157 e1 = CLAMP16( e1 );
1158 SET_LE16A( echo_ptr + 2, e1 );
1159 }
1160 #else /* Unoptimized CPU */
1060 /* Read feedback from echo buffer */ 1161 /* Read feedback from echo buffer */
1061 int echo_pos = this->echo_pos; 1162 int echo_pos = this->echo_pos;
1062 uint8_t* const echo_ptr = RAM + 1163 uint8_t* const echo_ptr = RAM +
@@ -1102,25 +1203,25 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
1102 out_buf [WAV_CHUNK_SIZE] = amp_1; 1203 out_buf [WAV_CHUNK_SIZE] = amp_1;
1103 out_buf ++; 1204 out_buf ++;
1104 1205
1105 /* Feedback into echo buffer */
1106 int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1107 int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1108 if ( !(this->r.g.flags & 0x20) ) 1206 if ( !(this->r.g.flags & 0x20) )
1109 { 1207 {
1110 CLAMP16( e0, e0 ); 1208 /* Feedback into echo buffer */
1209 int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1210 int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1211 e0 = CLAMP16( e0 );
1111 SET_LE16A( echo_ptr , e0 ); 1212 SET_LE16A( echo_ptr , e0 );
1112 CLAMP16( e1, e1 ); 1213 e1 = CLAMP16( e1 );
1113 SET_LE16A( echo_ptr + 2, e1 ); 1214 SET_LE16A( echo_ptr + 2, e1 );
1114 } 1215 }
1115 #endif /* CPU_COLDFIRE */ 1216 #endif /* CPU_* */
1116 #else 1217 #else /* SPCNOECHO == 1*/
1117 /* Generate output */ 1218 /* Generate output */
1118 int amp_0 = (chans_0 * global_vol_0) >> global_muting; 1219 int amp_0 = (chans_0 * global_vol_0) >> global_muting;
1119 int amp_1 = (chans_1 * global_vol_1) >> global_muting; 1220 int amp_1 = (chans_1 * global_vol_1) >> global_muting;
1120 out_buf [ 0] = amp_0; 1221 out_buf [ 0] = amp_0;
1121 out_buf [WAV_CHUNK_SIZE] = amp_1; 1222 out_buf [WAV_CHUNK_SIZE] = amp_1;
1122 out_buf ++; 1223 out_buf ++;
1123 #endif 1224 #endif /* SPCNOECHO */
1124 } 1225 }
1125 while ( --count ); 1226 while ( --count );
1126#if 0 1227#if 0
@@ -1155,10 +1256,13 @@ void DSP_reset( struct Spc_Dsp* this )
1155 this->wave_entry [i].start_addr = -1; 1256 this->wave_entry [i].start_addr = -1;
1156 #endif 1257 #endif
1157 1258
1158#ifdef CPU_COLDFIRE 1259#if defined(CPU_COLDFIRE)
1159 this->fir_ptr = fir_buf; 1260 this->fir_ptr = fir_buf;
1160 this->last_fir_ptr = &fir_buf [7]; 1261 this->last_fir_ptr = &fir_buf [7];
1161 ci->memset( fir_buf, 0, sizeof fir_buf ); 1262 ci->memset( fir_buf, 0, sizeof fir_buf );
1263#elif defined (CPU_ARM)
1264 this->fir_ptr = fir_buf;
1265 ci->memset( fir_buf, 0, sizeof fir_buf );
1162#else 1266#else
1163 this->fir_pos = 0; 1267 this->fir_pos = 0;
1164 ci->memset( this->fir_buf, 0, sizeof this->fir_buf ); 1268 ci->memset( this->fir_buf, 0, sizeof this->fir_buf );