summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/codecs/spc.c11
-rw-r--r--apps/codecs/spc/Spc_Dsp.h211
2 files changed, 191 insertions, 31 deletions
diff --git a/apps/codecs/spc.c b/apps/codecs/spc.c
index 86b9c0caf1..87b5972087 100644
--- a/apps/codecs/spc.c
+++ b/apps/codecs/spc.c
@@ -51,9 +51,14 @@ CODEC_HEADER
51 /* Disable gaussian interpolation */ 51 /* Disable gaussian interpolation */
52 #define SPC_NOINTERP 1 52 #define SPC_NOINTERP 1
53 53
54#ifndef CPU_COLDFIRE
54 /* Disable echo processing */ 55 /* Disable echo processing */
55 #define SPC_NOECHO 1 56 #define SPC_NOECHO 1
56#else 57#else
58 /* Enable echo processing */
59 #define SPC_NOECHO 0
60#endif
61#else
57 /* Don't cache BRR waves */ 62 /* Don't cache BRR waves */
58 #define SPC_BRRCACHE 0 63 #define SPC_BRRCACHE 0
59 64
@@ -100,6 +105,8 @@ static inline void set_le16( void* p, unsigned n )
100 105
101#define GET_LE16( addr ) get_le16( addr ) 106#define GET_LE16( addr ) get_le16( addr )
102#define SET_LE16( addr, data ) set_le16( addr, data ) 107#define SET_LE16( addr, data ) set_le16( addr, data )
108#define INT16A( addr ) (*(uint16_t*) (addr))
109#define INT16SA( addr ) (*(int16_t*) (addr))
103 110
104#ifdef ROCKBOX_LITTLE_ENDIAN 111#ifdef ROCKBOX_LITTLE_ENDIAN
105 #define GET_LE16A( addr ) (*(uint16_t*) (addr)) 112 #define GET_LE16A( addr ) (*(uint16_t*) (addr))
@@ -794,6 +801,10 @@ enum codec_status codec_main(void)
794{ 801{
795 memcpy( spc_emu.cycle_table, cycle_table, sizeof cycle_table ); 802 memcpy( spc_emu.cycle_table, cycle_table, sizeof cycle_table );
796 803
804#ifdef CPU_COLDFIRE
805 coldfire_set_macsr(EMAC_SATURATE);
806#endif
807
797 do 808 do
798 { 809 {
799 DEBUGF("SPC: next_track\n"); 810 DEBUGF("SPC: next_track\n");
diff --git a/apps/codecs/spc/Spc_Dsp.h b/apps/codecs/spc/Spc_Dsp.h
index 0cf55dee8a..4d64b2420c 100644
--- a/apps/codecs/spc/Spc_Dsp.h
+++ b/apps/codecs/spc/Spc_Dsp.h
@@ -107,6 +107,19 @@ static int16_t BRRcache [0x20000 + 32];
107 107
108enum { fir_buf_half = 8 }; 108enum { fir_buf_half = 8 };
109 109
110#ifdef CPU_COLDFIRE
111/* global because of the large aligment requirement for hardware masking -
112 * L-R interleaved 16-bit samples for easy loading and mac.w use.
113 */
114enum
115{
116 fir_buf_size = fir_buf_half * sizeof ( int32_t ),
117 fir_buf_mask = ~fir_buf_size
118};
119int32_t fir_buf[fir_buf_half]
120 __attribute__ ((aligned (fir_buf_size*2))) IBSS_ATTR;
121#endif /* CPU_COLDFIRE */
122
110struct Spc_Dsp 123struct Spc_Dsp
111{ 124{
112 union 125 union
@@ -122,11 +135,21 @@ struct Spc_Dsp
122 int noise_count; 135 int noise_count;
123 uint16_t noise; /* also read as int16_t */ 136 uint16_t noise; /* also read as int16_t */
124 137
138#ifdef CPU_COLDFIRE
139 /* circularly hardware masked address */
140 int32_t *fir_ptr;
141 /* wrapped address just behind current position -
142 allows mac.w to increment and mask fir_ptr */
143 int32_t *last_fir_ptr;
144 /* copy of echo FIR constants as int16_t for use with mac.w */
145 int16_t fir_coeff[voice_count];
146#else
125 /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */ 147 /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
126 int fir_pos; /* (0 to 7) */ 148 int fir_pos; /* (0 to 7) */
127 int fir_buf [fir_buf_half * 2] [2]; 149 int fir_buf [fir_buf_half * 2] [2];
128 /* copy of echo FIR constants as int, for faster access */ 150 /* copy of echo FIR constants as int, for faster access */
129 int fir_coeff [voice_count]; 151 int fir_coeff [voice_count];
152#endif
130 153
131 struct voice_t voice_state [voice_count]; 154 struct voice_t voice_state [voice_count];
132 155
@@ -149,7 +172,6 @@ static void DSP_reset( struct Spc_Dsp* this )
149 this->echo_pos = 0; 172 this->echo_pos = 0;
150 this->noise_count = 0; 173 this->noise_count = 0;
151 this->noise = 2; 174 this->noise = 2;
152 this->fir_pos = 0;
153 175
154 this->r.g.flags = 0xE0; /* reset, mute, echo off */ 176 this->r.g.flags = 0xE0; /* reset, mute, echo off */
155 this->r.g.key_ons = 0; 177 this->r.g.key_ons = 0;
@@ -169,8 +191,16 @@ static void DSP_reset( struct Spc_Dsp* this )
169 for ( i = 0; i < 256; i++ ) 191 for ( i = 0; i < 256; i++ )
170 this->wave_entry [i].start_addr = -1; 192 this->wave_entry [i].start_addr = -1;
171 #endif 193 #endif
172 194
195#ifdef CPU_COLDFIRE
196 this->fir_ptr = fir_buf;
197 this->last_fir_ptr = &fir_buf [7];
198 memset( fir_buf, 0, sizeof fir_buf );
199#else
200 this->fir_pos = 0;
173 memset( this->fir_buf, 0, sizeof this->fir_buf ); 201 memset( this->fir_buf, 0, sizeof this->fir_buf );
202#endif
203
174 assert( offsetof (struct globals_t,unused9 [2]) == register_count ); 204 assert( offsetof (struct globals_t,unused9 [2]) == register_count );
175 assert( sizeof (this->r.voice) == register_count ); 205 assert( sizeof (this->r.voice) == register_count );
176} 206}
@@ -394,7 +424,7 @@ static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice,
394 voice->envx = 0; 424 voice->envx = 0;
395 voice->env_mode = state_attack; 425 voice->env_mode = state_attack;
396 voice->env_timer = env_rate_init; /* TODO: inaccurate? */ 426 voice->env_timer = env_rate_init; /* TODO: inaccurate? */
397 unsigned start_addr = GET_LE16A( sd [raw_voice->waveform].start ); 427 unsigned start_addr = GET_LE16A(sd [raw_voice->waveform].start);
398 #if !SPC_BRRCACHE 428 #if !SPC_BRRCACHE
399 { 429 {
400 voice->addr = RAM + start_addr; 430 voice->addr = RAM + start_addr;
@@ -442,7 +472,7 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
442 EXIT_TIMER(cpu); 472 EXIT_TIMER(cpu);
443 ENTER_TIMER(dsp); 473 ENTER_TIMER(dsp);
444#endif 474#endif
445 475
446 /* Here we check for keys on/off. Docs say that successive writes 476 /* Here we check for keys on/off. Docs say that successive writes
447 to KON/KOF must be separated by at least 2 Ts periods or risk 477 to KON/KOF must be separated by at least 2 Ts periods or risk
448 being neglected. Therefore DSP only looks at these during an 478 being neglected. Therefore DSP only looks at these during an
@@ -479,16 +509,42 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
479 509
480 struct src_dir const* const sd = 510 struct src_dir const* const sd =
481 (struct src_dir*) &RAM [this->r.g.wave_page * 0x100]; 511 (struct src_dir*) &RAM [this->r.g.wave_page * 0x100];
512
513 #ifdef ROCKBOX_BIG_ENDIAN
514 /* Convert endiannesses before entering loops - these
515 get used alot */
516 const uint32_t rates[voice_count] =
517 {
518 GET_LE16A( this->r.voice[0].rate ) & 0x3FFF,
519 GET_LE16A( this->r.voice[1].rate ) & 0x3FFF,
520 GET_LE16A( this->r.voice[2].rate ) & 0x3FFF,
521 GET_LE16A( this->r.voice[3].rate ) & 0x3FFF,
522 GET_LE16A( this->r.voice[4].rate ) & 0x3FFF,
523 GET_LE16A( this->r.voice[5].rate ) & 0x3FFF,
524 GET_LE16A( this->r.voice[6].rate ) & 0x3FFF,
525 GET_LE16A( this->r.voice[7].rate ) & 0x3FFF,
526 };
527 #define VOICE_RATE(x) *(x)
528 #define IF_RBE(...) __VA_ARGS__
529 #ifdef CPU_COLDFIRE
530 /* Initialize mask register with the buffer address mask */
531 asm ("move.l %[m], %%mask" : : [m]"i"(fir_buf_mask));
532 const int echo_delay_mask = (this->r.g.echo_delay & 15) * 0x800 - 1;
533 const int echo_page = this->r.g.echo_page * 0x100;
534 #endif /* CPU_COLDFIRE */
535 #else
536 #define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF)
537 #define IF_RBE(...)
538 #endif /* ROCKBOX_BIG_ENDIAN */
482 539
483#if !SPC_NOINTERP 540#if !SPC_NOINTERP
484 int const slow_gaussian = (this->r.g.pitch_mods >> 1) | 541 int const slow_gaussian = (this->r.g.pitch_mods >> 1) |
485 this->r.g.noise_enables; 542 this->r.g.noise_enables;
486#endif 543#endif
487 /* (g.flags & 0x40) ? 30 : 14 */ 544 /* (g.flags & 0x40) ? 30 : 14 */
488 int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14; 545 int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8;
489 546 int const global_vol_0 = this->r.g.volume_0;
490 int const global_vol_0 = this->r.g.volume_0; 547 int const global_vol_1 = this->r.g.volume_1;
491 int const global_vol_1 = this->r.g.volume_1;
492 548
493 /* each rate divides exactly into 0x7800 without remainder */ 549 /* each rate divides exactly into 0x7800 without remainder */
494 int const env_rate_init = 0x7800; 550 int const env_rate_init = 0x7800;
@@ -525,7 +581,8 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
525 struct raw_voice_t * raw_voice = this->r.voice; 581 struct raw_voice_t * raw_voice = this->r.voice;
526 struct voice_t* voice = this->voice_state; 582 struct voice_t* voice = this->voice_state;
527 int vbit = 1; 583 int vbit = 1;
528 for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice ) 584 IF_RBE( const uint32_t* vr = rates; )
585 for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice IF_RBE( , ++vr ) )
529 { 586 {
530 /* pregen involves checking keyon, etc */ 587 /* pregen involves checking keyon, etc */
531#if 0 588#if 0
@@ -816,7 +873,7 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
816 #endif 873 #endif
817 874
818 /* Get rate (with possible modulation) */ 875 /* Get rate (with possible modulation) */
819 int rate = GET_LE16A( raw_voice->rate ) & 0x3FFF; 876 int rate = VOICE_RATE(vr);
820 if ( this->r.g.pitch_mods & vbit ) 877 if ( this->r.g.pitch_mods & vbit )
821 rate = (rate * (prev_outx + 32768)) >> 15; 878 rate = (rate * (prev_outx + 32768)) >> 15;
822 879
@@ -918,19 +975,20 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
918 { 975 {
919 uint32_t f = voice->position; 976 uint32_t f = voice->position;
920 int32_t y1; 977 int32_t y1;
978
921 asm ( 979 asm (
922 "move.l %[f], %[y0] \n" /* separate fraction */ 980 "move.l %[f], %[y0] \r\n" /* separate fraction */
923 "and.l #0xfff, %[f] \n" /* and whole parts */ 981 "and.l #0xfff, %[f] \r\n" /* and whole parts */
924 "lsr.l %[sh], %[y0] \n" 982 "lsr.l %[sh], %[y0] \r\n"
925 "move.l 2(%[s], %[y0].l*2), %[y1] \n" /* load two samples */ 983 "move.l 2(%[s], %[y0].l*2), %[y1] \r\n" /* load two samples */
926 "move.l %[y1], %[y0] \n" /* separate samples */ 984 "move.l %[y1], %[y0] \r\n" /* separate samples */
927 "ext.l %[y1] \n" /* y0=s[1], y1=s[2] */ 985 "ext.l %[y1] \r\n" /* y0=s[1], y1=s[2] */
928 "swap %[y0] \n" 986 "swap %[y0] \r\n"
929 "ext.l %[y0] \n" 987 "ext.l %[y0] \r\n"
930 "sub.l %[y0], %[y1] \n" /* diff = y1 - y0 */ 988 "sub.l %[y0], %[y1] \r\n" /* diff = y1 - y0 */
931 "muls.l %[f], %[y1] \n" /* y0 += f*diff */ 989 "muls.l %[f], %[y1] \r\n" /* y0 += f*diff */
932 "asr.l %[sh], %[y1] \n" 990 "asr.l %[sh], %[y1] \r\n"
933 "add.l %[y1], %[y0] \n" 991 "add.l %[y1], %[y0] \r\n"
934 : [f]"+&d"(f), [y0]"=&d"(output), [y1]"=&d"(y1) 992 : [f]"+&d"(f), [y0]"=&d"(output), [y1]"=&d"(y1)
935 : [s]"a"(voice->samples), [sh]"r"(12) 993 : [s]"a"(voice->samples), [sh]"r"(12)
936 ); 994 );
@@ -1020,6 +1078,100 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
1020 /* end of voice loop */ 1078 /* end of voice loop */
1021 1079
1022 #if !SPC_NOECHO 1080 #if !SPC_NOECHO
1081 #ifdef CPU_COLDFIRE
1082 /* Read feedback from echo buffer */
1083 int echo_pos = this->echo_pos;
1084 uint8_t* const echo_ptr = RAM + ((echo_page + echo_pos) & 0xFFFF);
1085 echo_pos = (echo_pos + 4) & echo_delay_mask;
1086 this->echo_pos = echo_pos;
1087 int fb = swap_odd_even32(*(int32_t *)echo_ptr);
1088 int out_0, out_1;
1089
1090 /* Keep last 8 samples */
1091 *this->last_fir_ptr = fb;
1092 this->last_fir_ptr = this->fir_ptr;
1093
1094 /* Apply echo FIR filter to output - circular buffer is hardware
1095 incremented and masked; FIR coefficients and buffer history are
1096 loaded in parallel with multiply accumulate operations. Apply
1097 scale factor to do hardware clipping later. */
1098 int _0, _1, _2;
1099 asm (
1100 "move.l (%[fir_c]) , %[_2] \r\n"
1101 "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n"
1102 "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n"
1103 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1104 "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n"
1105 "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n"
1106 "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n"
1107 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1108 "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n"
1109 "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n"
1110 "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n"
1111 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1112 "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n"
1113 "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n"
1114 "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
1115 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1116 "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
1117 "movclr.l %%acc0, %[out_0] \r\n"
1118 "movclr.l %%acc1, %[out_1] \r\n"
1119 : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2),
1120 [fir_p]"+a"(this->fir_ptr),
1121 [out_0]"=r"(out_0), [out_1]"=r"(out_1)
1122 : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb)
1123 );
1124
1125 /* Generate output */
1126 asm (
1127 "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
1128 "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
1129 "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
1130 "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
1131 :
1132 : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0),
1133 [ev_0]"r"((int)this->r.g.echo_volume_0),
1134 [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1),
1135 [ev_1]"r"((int)this->r.g.echo_volume_1),
1136 [out_0]"r"(out_0), [out_1]"r"(out_1)
1137 );
1138
1139 /* Feedback into echo buffer */
1140 if ( !(this->r.g.flags & 0x20) )
1141 {
1142 asm (
1143 "lsl.l %[sh], %[e0] \r\n"
1144 "move.l %[e0], %%acc0 \r\n"
1145 "mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
1146 "lsl.l %[sh], %[e1] \r\n"
1147 "move.l %[e1], %%acc1 \r\n"
1148 "mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
1149 "movclr.l %%acc0, %[e0] \r\n"
1150 "movclr.l %%acc1, %[e1] \r\n"
1151 "swap %[e1] \r\n"
1152 "move.w %[e1], %[e0] \r\n"
1153 : [e0]"+&d"(echo_0), [e1]"+&d"(echo_1)
1154 : [out_0]"r"(out_0), [out_1]"r"(out_1),
1155 [ef]"r"((int)this->r.g.echo_feedback),
1156 [sh]"d"(9)
1157 );
1158 *(int32_t *)echo_ptr = swap_odd_even32(echo_0);
1159 }
1160
1161 /* Output final samples */
1162 asm (
1163 "movclr.l %%acc2, %[out_0] \r\n"
1164 "movclr.l %%acc3, %[out_1] \r\n"
1165 "asr.l %[gm], %[out_0] \r\n"
1166 "asr.l %[gm], %[out_1] \r\n"
1167 : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1)
1168 : [gm]"d"(global_muting)
1169 );
1170
1171 out_buf [ 0] = out_0;
1172 out_buf [WAV_CHUNK_SIZE] = out_1;
1173 out_buf ++;
1174 #else /* !CPU_COLDFIRE */
1023 /* Read feedback from echo buffer */ 1175 /* Read feedback from echo buffer */
1024 int echo_pos = this->echo_pos; 1176 int echo_pos = this->echo_pos;
1025 uint8_t* const echo_ptr = RAM + 1177 uint8_t* const echo_ptr = RAM +
@@ -1061,10 +1213,8 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
1061 >> global_muting; 1213 >> global_muting;
1062 int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1) 1214 int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
1063 >> global_muting; 1215 >> global_muting;
1064 CLAMP16( amp_0, amp_0 ); 1216 out_buf [ 0] = amp_0;
1065 out_buf [0] = amp_0 * (1 << 8); 1217 out_buf [WAV_CHUNK_SIZE] = amp_1;
1066 CLAMP16( amp_1, amp_1 );
1067 out_buf [WAV_CHUNK_SIZE] = amp_1 * (1 << 8);
1068 out_buf ++; 1218 out_buf ++;
1069 1219
1070 /* Feedback into echo buffer */ 1220 /* Feedback into echo buffer */
@@ -1077,14 +1227,13 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
1077 CLAMP16( e1, e1 ); 1227 CLAMP16( e1, e1 );
1078 SET_LE16A( echo_ptr + 2, e1 ); 1228 SET_LE16A( echo_ptr + 2, e1 );
1079 } 1229 }
1230 #endif /* CPU_COLDFIRE */
1080 #else 1231 #else
1081 /* Generate output */ 1232 /* Generate output */
1082 int amp_0 = (chans_0 * global_vol_0) >> global_muting; 1233 int amp_0 = (chans_0 * global_vol_0) >> global_muting;
1083 int amp_1 = (chans_1 * global_vol_1) >> global_muting; 1234 int amp_1 = (chans_1 * global_vol_1) >> global_muting;
1084 CLAMP16( amp_0, amp_0 ); 1235 out_buf [ 0] = amp_0;
1085 out_buf [0] = amp_0 * (1 << 8); 1236 out_buf [WAV_CHUNK_SIZE] = amp_1;
1086 CLAMP16( amp_1, amp_1 );
1087 out_buf [WAV_CHUNK_SIZE] = amp_1 * (1 << 8);
1088 out_buf ++; 1237 out_buf ++;
1089 #endif 1238 #endif
1090 } 1239 }