summaryrefslogtreecommitdiff
path: root/apps/codecs/spc
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/spc')
-rw-r--r--apps/codecs/spc/Spc_Dsp.h93
1 files changed, 62 insertions, 31 deletions
diff --git a/apps/codecs/spc/Spc_Dsp.h b/apps/codecs/spc/Spc_Dsp.h
index 6b530a7a62..fdcd37f226 100644
--- a/apps/codecs/spc/Spc_Dsp.h
+++ b/apps/codecs/spc/Spc_Dsp.h
@@ -974,23 +974,35 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
974 if ( (this->r.g.noise_enables & vbit) == 0 ) 974 if ( (this->r.g.noise_enables & vbit) == 0 )
975 { 975 {
976 uint32_t f = voice->position; 976 uint32_t f = voice->position;
977 int32_t y1; 977 int32_t y0;
978 978
979 /**
980 * Formula (fastest found so far of MANY):
981 * output = y0 + f*y1 - f*y0
982 */
979 asm volatile ( 983 asm volatile (
980 "move.l %[f], %[y0] \r\n" /* separate fraction */ 984 /* separate fractional and whole parts */
981 "and.l #0xfff, %[f] \r\n" /* and whole parts */ 985 "move.l %[f], %[y1] \r\n"
982 "lsr.l %[sh], %[y0] \r\n" 986 "and.l #0xfff, %[f] \r\n"
983 "move.l 2(%[s], %[y0].l*2), %[y1] \r\n" /* load two samples */ 987 "lsr.l %[sh], %[y1] \r\n"
984 "move.l %[y1], %[y0] \r\n" /* separate samples */ 988 /* load samples y0 (upper) & y1 (lower) */
985 "ext.l %[y1] \r\n" /* y0=s[1], y1=s[2] */ 989 "move.l 2(%[s], %[y1].l*2), %[y1] \r\n"
986 "swap %[y0] \r\n" 990 /* %acc0 = f*y1 */
987 "ext.l %[y0] \r\n" 991 "mac.w %[f]l, %[y1]l, %%acc0 \r\n"
988 "sub.l %[y0], %[y1] \r\n" /* diff = y1 - y0 */ 992 /* msac.w is 2% boostier so add negative */
989 "muls.l %[f], %[y1] \r\n" /* y0 += f*diff */ 993 "neg.l %[f] \r\n"
990 "asr.l %[sh], %[y1] \r\n" 994 /* %acc0 -= f*y0 */
991 "add.l %[y1], %[y0] \r\n" 995 "mac.w %[f]l, %[y1]u, %%acc0 \r\n"
992 : [f]"+&d"(f), [y0]"=&d"(output), [y1]"=&d"(y1) 996 /* separate out y0 and sign extend */
993 : [s]"a"(voice->samples), [sh]"r"(12) 997 "swap %[y1] \r\n"
998 "movea.w %[y1], %[y0] \r\n"
999 /* fetch result, scale down and add y0 */
1000 "movclr.l %%acc0, %[y1] \r\n"
1001 /* output = y0 + (result >> 12) */
1002 "asr.l %[sh], %[y1] \r\n"
1003 "add.l %[y0], %[y1] \r\n"
1004 : [f]"+&d"(f), [y0]"=&a"(y0), [y1]"=&d"(output)
1005 : [s]"a"(voice->samples), [sh]"d"(12)
994 ); 1006 );
995 } 1007 }
996 1008
@@ -1093,9 +1105,13 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
1093 *this->last_fir_ptr = fb; 1105 *this->last_fir_ptr = fb;
1094 this->last_fir_ptr = this->fir_ptr; 1106 this->last_fir_ptr = this->fir_ptr;
1095 1107
1096 /* Apply echo FIR filter to output - circular buffer is hardware 1108 /* Apply echo FIR filter to output samples read from echo buffer -
1097 incremented and masked; FIR coefficients and buffer history are 1109 circular buffer is hardware incremented and masked; FIR
1098 loaded in parallel with multiply accumulate operations. */ 1110 coefficients and buffer history are loaded in parallel with
1111 multiply accumulate operations. Shift left by one here and once
1112 again when calculating feedback to have sample values justified
1113 to bit 31 in the output to ease endian swap, interleaving and
1114 clamping before placing result in the program's echo buffer. */
1099 int _0, _1, _2; 1115 int _0, _1, _2;
1100 asm volatile ( 1116 asm volatile (
1101 "move.l (%[fir_c]) , %[_2] \r\n" 1117 "move.l (%[fir_c]) , %[_2] \r\n"
@@ -1115,53 +1131,68 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
1115 "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n" 1131 "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
1116 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" 1132 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1117 "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n" 1133 "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
1118 "movclr.l %%acc0, %[out_0] \r\n"
1119 "movclr.l %%acc1, %[out_1] \r\n"
1120 : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2), 1134 : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2),
1121 [fir_p]"+a"(this->fir_ptr), 1135 [fir_p]"+a"(this->fir_ptr)
1122 [out_0]"=r"(out_0), [out_1]"=r"(out_1)
1123 : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb) 1136 : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb)
1124 ); 1137 );
1125 1138
1126 /* Generate output */ 1139 /* Generate output */
1127 asm volatile ( 1140 asm volatile (
1141 /* fetch filter results to eliminate stalls */
1142 "movclr.l %%acc0, %[out_0] \r\n"
1143 "movclr.l %%acc1, %[out_1] \r\n"
1144 /* apply global volume */
1128 "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n" 1145 "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
1129 "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n" 1146 "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
1147 /* apply echo volume and add to final output */
1130 "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n" 1148 "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
1131 "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n" 1149 "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
1132 : 1150 : [out_0]"=&r"(out_0), [out_1]"=&r"(out_1)
1133 : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0), 1151 : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0),
1134 [ev_0]"r"((int)this->r.g.echo_volume_0), 1152 [ev_0]"r"((int)this->r.g.echo_volume_0),
1135 [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1), 1153 [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1),
1136 [ev_1]"r"((int)this->r.g.echo_volume_1), 1154 [ev_1]"r"((int)this->r.g.echo_volume_1)
1137 [out_0]"r"(out_0), [out_1]"r"(out_1)
1138 ); 1155 );
1139 1156
1140 /* Feedback into echo buffer */ 1157 /* Feedback into echo buffer */
1141 if ( !(this->r.g.flags & 0x20) ) 1158 if ( !(this->r.g.flags & 0x20) )
1142 { 1159 {
1143 asm volatile ( 1160 asm volatile (
1144 "mac.l %[sh], %[e0] , %%acc0 \r\n" 1161 /* scale echo voices; saturate if overflow */
1145 "mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
1146 "mac.l %[sh], %[e1] , %%acc1 \r\n" 1162 "mac.l %[sh], %[e1] , %%acc1 \r\n"
1163 "mac.l %[sh], %[e0] , %%acc0 \r\n"
1164 /* add scaled output from FIR filter */
1147 "mac.l %[out_1], %[ef], <<, %%acc1 \r\n" 1165 "mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
1148 "movclr.l %%acc0, %[e0] \r\n" 1166 "mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
1167 /* swap and fetch feedback results - simply
1168 swap_odd_even32 mixed in between macs and
1169 movclrs to mitigate stall issues */
1170 "move.l #0x00ff00ff, %[sh] \r\n"
1149 "movclr.l %%acc1, %[e1] \r\n" 1171 "movclr.l %%acc1, %[e1] \r\n"
1150 "swap %[e1] \r\n" 1172 "swap %[e1] \r\n"
1173 "movclr.l %%acc0, %[e0] \r\n"
1151 "move.w %[e1], %[e0] \r\n" 1174 "move.w %[e1], %[e0] \r\n"
1175 "and.l %[e0], %[sh] \r\n"
1176 "eor.l %[sh], %[e0] \r\n"
1177 "lsl.l #8, %[sh] \r\n"
1178 "lsr.l #8, %[e0] \r\n"
1179 "or.l %[sh], %[e0] \r\n"
1180 /* save final feedback into echo buffer */
1181 "move.l %[e0], (%[echo_ptr]) \r\n"
1152 : [e0]"+&d"(echo_0), [e1]"+&d"(echo_1) 1182 : [e0]"+&d"(echo_0), [e1]"+&d"(echo_1)
1153 : [out_0]"r"(out_0), [out_1]"r"(out_1), 1183 : [out_0]"r"(out_0), [out_1]"r"(out_1),
1154 [ef]"r"((int)this->r.g.echo_feedback), 1184 [ef]"r"((int)this->r.g.echo_feedback),
1155 [sh]"r"(1 << 9) 1185 [echo_ptr]"a"((int32_t *)echo_ptr),
1186 [sh]"d"(1 << 9)
1156 ); 1187 );
1157
1158 *(int32_t *)echo_ptr = swap_odd_even32(echo_0);
1159 } 1188 }
1160 1189
1161 /* Output final samples */ 1190 /* Output final samples */
1162 asm volatile ( 1191 asm volatile (
1192 /* fetch output saved in %acc2 and %acc3 */
1163 "movclr.l %%acc2, %[out_0] \r\n" 1193 "movclr.l %%acc2, %[out_0] \r\n"
1164 "movclr.l %%acc3, %[out_1] \r\n" 1194 "movclr.l %%acc3, %[out_1] \r\n"
1195 /* scale right by global_muting shift */
1165 "asr.l %[gm], %[out_0] \r\n" 1196 "asr.l %[gm], %[out_0] \r\n"
1166 "asr.l %[gm], %[out_1] \r\n" 1197 "asr.l %[gm], %[out_1] \r\n"
1167 : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1) 1198 : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1)