diff options
Diffstat (limited to 'apps/codecs/spc/Spc_Dsp.h')
-rw-r--r-- | apps/codecs/spc/Spc_Dsp.h | 211 |
1 files changed, 180 insertions, 31 deletions
diff --git a/apps/codecs/spc/Spc_Dsp.h b/apps/codecs/spc/Spc_Dsp.h index 0cf55dee8a..4d64b2420c 100644 --- a/apps/codecs/spc/Spc_Dsp.h +++ b/apps/codecs/spc/Spc_Dsp.h | |||
@@ -107,6 +107,19 @@ static int16_t BRRcache [0x20000 + 32]; | |||
107 | 107 | ||
108 | enum { fir_buf_half = 8 }; | 108 | enum { fir_buf_half = 8 }; |
109 | 109 | ||
110 | #ifdef CPU_COLDFIRE | ||
111 | /* global because of the large aligment requirement for hardware masking - | ||
112 | * L-R interleaved 16-bit samples for easy loading and mac.w use. | ||
113 | */ | ||
114 | enum | ||
115 | { | ||
116 | fir_buf_size = fir_buf_half * sizeof ( int32_t ), | ||
117 | fir_buf_mask = ~fir_buf_size | ||
118 | }; | ||
119 | int32_t fir_buf[fir_buf_half] | ||
120 | __attribute__ ((aligned (fir_buf_size*2))) IBSS_ATTR; | ||
121 | #endif /* CPU_COLDFIRE */ | ||
122 | |||
110 | struct Spc_Dsp | 123 | struct Spc_Dsp |
111 | { | 124 | { |
112 | union | 125 | union |
@@ -122,11 +135,21 @@ struct Spc_Dsp | |||
122 | int noise_count; | 135 | int noise_count; |
123 | uint16_t noise; /* also read as int16_t */ | 136 | uint16_t noise; /* also read as int16_t */ |
124 | 137 | ||
138 | #ifdef CPU_COLDFIRE | ||
139 | /* circularly hardware masked address */ | ||
140 | int32_t *fir_ptr; | ||
141 | /* wrapped address just behind current position - | ||
142 | allows mac.w to increment and mask fir_ptr */ | ||
143 | int32_t *last_fir_ptr; | ||
144 | /* copy of echo FIR constants as int16_t for use with mac.w */ | ||
145 | int16_t fir_coeff[voice_count]; | ||
146 | #else | ||
125 | /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */ | 147 | /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */ |
126 | int fir_pos; /* (0 to 7) */ | 148 | int fir_pos; /* (0 to 7) */ |
127 | int fir_buf [fir_buf_half * 2] [2]; | 149 | int fir_buf [fir_buf_half * 2] [2]; |
128 | /* copy of echo FIR constants as int, for faster access */ | 150 | /* copy of echo FIR constants as int, for faster access */ |
129 | int fir_coeff [voice_count]; | 151 | int fir_coeff [voice_count]; |
152 | #endif | ||
130 | 153 | ||
131 | struct voice_t voice_state [voice_count]; | 154 | struct voice_t voice_state [voice_count]; |
132 | 155 | ||
@@ -149,7 +172,6 @@ static void DSP_reset( struct Spc_Dsp* this ) | |||
149 | this->echo_pos = 0; | 172 | this->echo_pos = 0; |
150 | this->noise_count = 0; | 173 | this->noise_count = 0; |
151 | this->noise = 2; | 174 | this->noise = 2; |
152 | this->fir_pos = 0; | ||
153 | 175 | ||
154 | this->r.g.flags = 0xE0; /* reset, mute, echo off */ | 176 | this->r.g.flags = 0xE0; /* reset, mute, echo off */ |
155 | this->r.g.key_ons = 0; | 177 | this->r.g.key_ons = 0; |
@@ -169,8 +191,16 @@ static void DSP_reset( struct Spc_Dsp* this ) | |||
169 | for ( i = 0; i < 256; i++ ) | 191 | for ( i = 0; i < 256; i++ ) |
170 | this->wave_entry [i].start_addr = -1; | 192 | this->wave_entry [i].start_addr = -1; |
171 | #endif | 193 | #endif |
172 | 194 | ||
195 | #ifdef CPU_COLDFIRE | ||
196 | this->fir_ptr = fir_buf; | ||
197 | this->last_fir_ptr = &fir_buf [7]; | ||
198 | memset( fir_buf, 0, sizeof fir_buf ); | ||
199 | #else | ||
200 | this->fir_pos = 0; | ||
173 | memset( this->fir_buf, 0, sizeof this->fir_buf ); | 201 | memset( this->fir_buf, 0, sizeof this->fir_buf ); |
202 | #endif | ||
203 | |||
174 | assert( offsetof (struct globals_t,unused9 [2]) == register_count ); | 204 | assert( offsetof (struct globals_t,unused9 [2]) == register_count ); |
175 | assert( sizeof (this->r.voice) == register_count ); | 205 | assert( sizeof (this->r.voice) == register_count ); |
176 | } | 206 | } |
@@ -394,7 +424,7 @@ static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice, | |||
394 | voice->envx = 0; | 424 | voice->envx = 0; |
395 | voice->env_mode = state_attack; | 425 | voice->env_mode = state_attack; |
396 | voice->env_timer = env_rate_init; /* TODO: inaccurate? */ | 426 | voice->env_timer = env_rate_init; /* TODO: inaccurate? */ |
397 | unsigned start_addr = GET_LE16A( sd [raw_voice->waveform].start ); | 427 | unsigned start_addr = GET_LE16A(sd [raw_voice->waveform].start); |
398 | #if !SPC_BRRCACHE | 428 | #if !SPC_BRRCACHE |
399 | { | 429 | { |
400 | voice->addr = RAM + start_addr; | 430 | voice->addr = RAM + start_addr; |
@@ -442,7 +472,7 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
442 | EXIT_TIMER(cpu); | 472 | EXIT_TIMER(cpu); |
443 | ENTER_TIMER(dsp); | 473 | ENTER_TIMER(dsp); |
444 | #endif | 474 | #endif |
445 | 475 | ||
446 | /* Here we check for keys on/off. Docs say that successive writes | 476 | /* Here we check for keys on/off. Docs say that successive writes |
447 | to KON/KOF must be separated by at least 2 Ts periods or risk | 477 | to KON/KOF must be separated by at least 2 Ts periods or risk |
448 | being neglected. Therefore DSP only looks at these during an | 478 | being neglected. Therefore DSP only looks at these during an |
@@ -479,16 +509,42 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
479 | 509 | ||
480 | struct src_dir const* const sd = | 510 | struct src_dir const* const sd = |
481 | (struct src_dir*) &RAM [this->r.g.wave_page * 0x100]; | 511 | (struct src_dir*) &RAM [this->r.g.wave_page * 0x100]; |
512 | |||
513 | #ifdef ROCKBOX_BIG_ENDIAN | ||
514 | /* Convert endiannesses before entering loops - these | ||
515 | get used alot */ | ||
516 | const uint32_t rates[voice_count] = | ||
517 | { | ||
518 | GET_LE16A( this->r.voice[0].rate ) & 0x3FFF, | ||
519 | GET_LE16A( this->r.voice[1].rate ) & 0x3FFF, | ||
520 | GET_LE16A( this->r.voice[2].rate ) & 0x3FFF, | ||
521 | GET_LE16A( this->r.voice[3].rate ) & 0x3FFF, | ||
522 | GET_LE16A( this->r.voice[4].rate ) & 0x3FFF, | ||
523 | GET_LE16A( this->r.voice[5].rate ) & 0x3FFF, | ||
524 | GET_LE16A( this->r.voice[6].rate ) & 0x3FFF, | ||
525 | GET_LE16A( this->r.voice[7].rate ) & 0x3FFF, | ||
526 | }; | ||
527 | #define VOICE_RATE(x) *(x) | ||
528 | #define IF_RBE(...) __VA_ARGS__ | ||
529 | #ifdef CPU_COLDFIRE | ||
530 | /* Initialize mask register with the buffer address mask */ | ||
531 | asm ("move.l %[m], %%mask" : : [m]"i"(fir_buf_mask)); | ||
532 | const int echo_delay_mask = (this->r.g.echo_delay & 15) * 0x800 - 1; | ||
533 | const int echo_page = this->r.g.echo_page * 0x100; | ||
534 | #endif /* CPU_COLDFIRE */ | ||
535 | #else | ||
536 | #define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF) | ||
537 | #define IF_RBE(...) | ||
538 | #endif /* ROCKBOX_BIG_ENDIAN */ | ||
482 | 539 | ||
483 | #if !SPC_NOINTERP | 540 | #if !SPC_NOINTERP |
484 | int const slow_gaussian = (this->r.g.pitch_mods >> 1) | | 541 | int const slow_gaussian = (this->r.g.pitch_mods >> 1) | |
485 | this->r.g.noise_enables; | 542 | this->r.g.noise_enables; |
486 | #endif | 543 | #endif |
487 | /* (g.flags & 0x40) ? 30 : 14 */ | 544 | /* (g.flags & 0x40) ? 30 : 14 */ |
488 | int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14; | 545 | int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8; |
489 | 546 | int const global_vol_0 = this->r.g.volume_0; | |
490 | int const global_vol_0 = this->r.g.volume_0; | 547 | int const global_vol_1 = this->r.g.volume_1; |
491 | int const global_vol_1 = this->r.g.volume_1; | ||
492 | 548 | ||
493 | /* each rate divides exactly into 0x7800 without remainder */ | 549 | /* each rate divides exactly into 0x7800 without remainder */ |
494 | int const env_rate_init = 0x7800; | 550 | int const env_rate_init = 0x7800; |
@@ -525,7 +581,8 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
525 | struct raw_voice_t * raw_voice = this->r.voice; | 581 | struct raw_voice_t * raw_voice = this->r.voice; |
526 | struct voice_t* voice = this->voice_state; | 582 | struct voice_t* voice = this->voice_state; |
527 | int vbit = 1; | 583 | int vbit = 1; |
528 | for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice ) | 584 | IF_RBE( const uint32_t* vr = rates; ) |
585 | for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice IF_RBE( , ++vr ) ) | ||
529 | { | 586 | { |
530 | /* pregen involves checking keyon, etc */ | 587 | /* pregen involves checking keyon, etc */ |
531 | #if 0 | 588 | #if 0 |
@@ -816,7 +873,7 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
816 | #endif | 873 | #endif |
817 | 874 | ||
818 | /* Get rate (with possible modulation) */ | 875 | /* Get rate (with possible modulation) */ |
819 | int rate = GET_LE16A( raw_voice->rate ) & 0x3FFF; | 876 | int rate = VOICE_RATE(vr); |
820 | if ( this->r.g.pitch_mods & vbit ) | 877 | if ( this->r.g.pitch_mods & vbit ) |
821 | rate = (rate * (prev_outx + 32768)) >> 15; | 878 | rate = (rate * (prev_outx + 32768)) >> 15; |
822 | 879 | ||
@@ -918,19 +975,20 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
918 | { | 975 | { |
919 | uint32_t f = voice->position; | 976 | uint32_t f = voice->position; |
920 | int32_t y1; | 977 | int32_t y1; |
978 | |||
921 | asm ( | 979 | asm ( |
922 | "move.l %[f], %[y0] \n" /* separate fraction */ | 980 | "move.l %[f], %[y0] \r\n" /* separate fraction */ |
923 | "and.l #0xfff, %[f] \n" /* and whole parts */ | 981 | "and.l #0xfff, %[f] \r\n" /* and whole parts */ |
924 | "lsr.l %[sh], %[y0] \n" | 982 | "lsr.l %[sh], %[y0] \r\n" |
925 | "move.l 2(%[s], %[y0].l*2), %[y1] \n" /* load two samples */ | 983 | "move.l 2(%[s], %[y0].l*2), %[y1] \r\n" /* load two samples */ |
926 | "move.l %[y1], %[y0] \n" /* separate samples */ | 984 | "move.l %[y1], %[y0] \r\n" /* separate samples */ |
927 | "ext.l %[y1] \n" /* y0=s[1], y1=s[2] */ | 985 | "ext.l %[y1] \r\n" /* y0=s[1], y1=s[2] */ |
928 | "swap %[y0] \n" | 986 | "swap %[y0] \r\n" |
929 | "ext.l %[y0] \n" | 987 | "ext.l %[y0] \r\n" |
930 | "sub.l %[y0], %[y1] \n" /* diff = y1 - y0 */ | 988 | "sub.l %[y0], %[y1] \r\n" /* diff = y1 - y0 */ |
931 | "muls.l %[f], %[y1] \n" /* y0 += f*diff */ | 989 | "muls.l %[f], %[y1] \r\n" /* y0 += f*diff */ |
932 | "asr.l %[sh], %[y1] \n" | 990 | "asr.l %[sh], %[y1] \r\n" |
933 | "add.l %[y1], %[y0] \n" | 991 | "add.l %[y1], %[y0] \r\n" |
934 | : [f]"+&d"(f), [y0]"=&d"(output), [y1]"=&d"(y1) | 992 | : [f]"+&d"(f), [y0]"=&d"(output), [y1]"=&d"(y1) |
935 | : [s]"a"(voice->samples), [sh]"r"(12) | 993 | : [s]"a"(voice->samples), [sh]"r"(12) |
936 | ); | 994 | ); |
@@ -1020,6 +1078,100 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
1020 | /* end of voice loop */ | 1078 | /* end of voice loop */ |
1021 | 1079 | ||
1022 | #if !SPC_NOECHO | 1080 | #if !SPC_NOECHO |
1081 | #ifdef CPU_COLDFIRE | ||
1082 | /* Read feedback from echo buffer */ | ||
1083 | int echo_pos = this->echo_pos; | ||
1084 | uint8_t* const echo_ptr = RAM + ((echo_page + echo_pos) & 0xFFFF); | ||
1085 | echo_pos = (echo_pos + 4) & echo_delay_mask; | ||
1086 | this->echo_pos = echo_pos; | ||
1087 | int fb = swap_odd_even32(*(int32_t *)echo_ptr); | ||
1088 | int out_0, out_1; | ||
1089 | |||
1090 | /* Keep last 8 samples */ | ||
1091 | *this->last_fir_ptr = fb; | ||
1092 | this->last_fir_ptr = this->fir_ptr; | ||
1093 | |||
1094 | /* Apply echo FIR filter to output - circular buffer is hardware | ||
1095 | incremented and masked; FIR coefficients and buffer history are | ||
1096 | loaded in parallel with multiply accumulate operations. Apply | ||
1097 | scale factor to do hardware clipping later. */ | ||
1098 | int _0, _1, _2; | ||
1099 | asm ( | ||
1100 | "move.l (%[fir_c]) , %[_2] \r\n" | ||
1101 | "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n" | ||
1102 | "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n" | ||
1103 | "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" | ||
1104 | "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n" | ||
1105 | "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n" | ||
1106 | "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n" | ||
1107 | "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" | ||
1108 | "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n" | ||
1109 | "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n" | ||
1110 | "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n" | ||
1111 | "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" | ||
1112 | "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n" | ||
1113 | "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n" | ||
1114 | "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n" | ||
1115 | "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" | ||
1116 | "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n" | ||
1117 | "movclr.l %%acc0, %[out_0] \r\n" | ||
1118 | "movclr.l %%acc1, %[out_1] \r\n" | ||
1119 | : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2), | ||
1120 | [fir_p]"+a"(this->fir_ptr), | ||
1121 | [out_0]"=r"(out_0), [out_1]"=r"(out_1) | ||
1122 | : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb) | ||
1123 | ); | ||
1124 | |||
1125 | /* Generate output */ | ||
1126 | asm ( | ||
1127 | "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n" | ||
1128 | "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n" | ||
1129 | "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n" | ||
1130 | "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n" | ||
1131 | : | ||
1132 | : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0), | ||
1133 | [ev_0]"r"((int)this->r.g.echo_volume_0), | ||
1134 | [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1), | ||
1135 | [ev_1]"r"((int)this->r.g.echo_volume_1), | ||
1136 | [out_0]"r"(out_0), [out_1]"r"(out_1) | ||
1137 | ); | ||
1138 | |||
1139 | /* Feedback into echo buffer */ | ||
1140 | if ( !(this->r.g.flags & 0x20) ) | ||
1141 | { | ||
1142 | asm ( | ||
1143 | "lsl.l %[sh], %[e0] \r\n" | ||
1144 | "move.l %[e0], %%acc0 \r\n" | ||
1145 | "mac.l %[out_0], %[ef], <<, %%acc0 \r\n" | ||
1146 | "lsl.l %[sh], %[e1] \r\n" | ||
1147 | "move.l %[e1], %%acc1 \r\n" | ||
1148 | "mac.l %[out_1], %[ef], <<, %%acc1 \r\n" | ||
1149 | "movclr.l %%acc0, %[e0] \r\n" | ||
1150 | "movclr.l %%acc1, %[e1] \r\n" | ||
1151 | "swap %[e1] \r\n" | ||
1152 | "move.w %[e1], %[e0] \r\n" | ||
1153 | : [e0]"+&d"(echo_0), [e1]"+&d"(echo_1) | ||
1154 | : [out_0]"r"(out_0), [out_1]"r"(out_1), | ||
1155 | [ef]"r"((int)this->r.g.echo_feedback), | ||
1156 | [sh]"d"(9) | ||
1157 | ); | ||
1158 | *(int32_t *)echo_ptr = swap_odd_even32(echo_0); | ||
1159 | } | ||
1160 | |||
1161 | /* Output final samples */ | ||
1162 | asm ( | ||
1163 | "movclr.l %%acc2, %[out_0] \r\n" | ||
1164 | "movclr.l %%acc3, %[out_1] \r\n" | ||
1165 | "asr.l %[gm], %[out_0] \r\n" | ||
1166 | "asr.l %[gm], %[out_1] \r\n" | ||
1167 | : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1) | ||
1168 | : [gm]"d"(global_muting) | ||
1169 | ); | ||
1170 | |||
1171 | out_buf [ 0] = out_0; | ||
1172 | out_buf [WAV_CHUNK_SIZE] = out_1; | ||
1173 | out_buf ++; | ||
1174 | #else /* !CPU_COLDFIRE */ | ||
1023 | /* Read feedback from echo buffer */ | 1175 | /* Read feedback from echo buffer */ |
1024 | int echo_pos = this->echo_pos; | 1176 | int echo_pos = this->echo_pos; |
1025 | uint8_t* const echo_ptr = RAM + | 1177 | uint8_t* const echo_ptr = RAM + |
@@ -1061,10 +1213,8 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
1061 | >> global_muting; | 1213 | >> global_muting; |
1062 | int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1) | 1214 | int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1) |
1063 | >> global_muting; | 1215 | >> global_muting; |
1064 | CLAMP16( amp_0, amp_0 ); | 1216 | out_buf [ 0] = amp_0; |
1065 | out_buf [0] = amp_0 * (1 << 8); | 1217 | out_buf [WAV_CHUNK_SIZE] = amp_1; |
1066 | CLAMP16( amp_1, amp_1 ); | ||
1067 | out_buf [WAV_CHUNK_SIZE] = amp_1 * (1 << 8); | ||
1068 | out_buf ++; | 1218 | out_buf ++; |
1069 | 1219 | ||
1070 | /* Feedback into echo buffer */ | 1220 | /* Feedback into echo buffer */ |
@@ -1077,14 +1227,13 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
1077 | CLAMP16( e1, e1 ); | 1227 | CLAMP16( e1, e1 ); |
1078 | SET_LE16A( echo_ptr + 2, e1 ); | 1228 | SET_LE16A( echo_ptr + 2, e1 ); |
1079 | } | 1229 | } |
1230 | #endif /* CPU_COLDFIRE */ | ||
1080 | #else | 1231 | #else |
1081 | /* Generate output */ | 1232 | /* Generate output */ |
1082 | int amp_0 = (chans_0 * global_vol_0) >> global_muting; | 1233 | int amp_0 = (chans_0 * global_vol_0) >> global_muting; |
1083 | int amp_1 = (chans_1 * global_vol_1) >> global_muting; | 1234 | int amp_1 = (chans_1 * global_vol_1) >> global_muting; |
1084 | CLAMP16( amp_0, amp_0 ); | 1235 | out_buf [ 0] = amp_0; |
1085 | out_buf [0] = amp_0 * (1 << 8); | 1236 | out_buf [WAV_CHUNK_SIZE] = amp_1; |
1086 | CLAMP16( amp_1, amp_1 ); | ||
1087 | out_buf [WAV_CHUNK_SIZE] = amp_1 * (1 << 8); | ||
1088 | out_buf ++; | 1237 | out_buf ++; |
1089 | #endif | 1238 | #endif |
1090 | } | 1239 | } |