summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Sevakis <jethead71@rockbox.org>2007-02-19 02:49:26 +0000
committerMichael Sevakis <jethead71@rockbox.org>2007-02-19 02:49:26 +0000
commit36175ac9453999d2d079c521126ecc5ac7a8d984 (patch)
treea37e87b5fd7283d1456b7a346e16c1a5ed590a2c
parent2801a87d543f38cadd076330f329c84e23852997 (diff)
downloadrockbox-36175ac9453999d2d079c521126ecc5ac7a8d984.tar.gz
rockbox-36175ac9453999d2d079c521126ecc5ac7a8d984.zip
SWCODEC: DSP optimizations for conversion to internal format and resampling. Assembly resampling for Coldfire. Word has it ARM will get that soon.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12399 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/dsp.c388
-rw-r--r--apps/dsp.h10
-rw-r--r--apps/dsp_asm.h8
-rw-r--r--apps/dsp_cf.S145
4 files changed, 391 insertions, 160 deletions
diff --git a/apps/dsp.c b/apps/dsp.c
index c7eed8bd76..c062f2c088 100644
--- a/apps/dsp.c
+++ b/apps/dsp.c
@@ -46,6 +46,18 @@
46#define RESAMPLE_BUF_COUNT (256 * 4) /* Enough for 11,025 Hz -> 44,100 Hz*/ 46#define RESAMPLE_BUF_COUNT (256 * 4) /* Enough for 11,025 Hz -> 44,100 Hz*/
47#define DEFAULT_GAIN 0x01000000 47#define DEFAULT_GAIN 0x01000000
48 48
49
50enum
51{
52 CONVERT_LE_NATIVE_I_STEREO = STEREO_INTERLEAVED,
53 CONVERT_LE_NATIVE_NI_STEREO = STEREO_NONINTERLEAVED,
54 CONVERT_LE_NATIVE_MONO = STEREO_MONO,
55 CONVERT_GT_NATIVE_I_STEREO = STEREO_INTERLEAVED + STEREO_NUM_MODES,
56 CONVERT_GT_NATIVE_NI_STEREO = STEREO_NONINTERLEAVED + STEREO_NUM_MODES,
57 CONVERT_GT_NATIVE_MONO = STEREO_MONO + STEREO_NUM_MODES,
58 CONVERT_GT_NATIVE_1ST_INDEX = STEREO_NUM_MODES
59};
60
49struct dsp_config 61struct dsp_config
50{ 62{
51 long codec_frequency; /* Sample rate of data coming from the codec */ 63 long codec_frequency; /* Sample rate of data coming from the codec */
@@ -60,6 +72,7 @@ struct dsp_config
60 int sample_depth; 72 int sample_depth;
61 int sample_bytes; 73 int sample_bytes;
62 int stereo_mode; 74 int stereo_mode;
75 int num_channels;
63 int frac_bits; 76 int frac_bits;
64 bool dither_enabled; 77 bool dither_enabled;
65 long dither_bias; 78 long dither_bias;
@@ -69,11 +82,13 @@ struct dsp_config
69 bool eq_enabled; 82 bool eq_enabled;
70 long eq_precut; 83 long eq_precut;
71 long gain; /* Note that this is in S8.23 format. */ 84 long gain; /* Note that this is in S8.23 format. */
85 int (*convert_to_internal)(const char* src[], int32_t* dst[], int count);
72}; 86};
73 87
74struct resample_data 88struct resample_data
75{ 89{
76 long phase, delta; 90 long phase;
91 long delta;
77 int32_t last_sample[2]; 92 int32_t last_sample[2];
78}; 93};
79 94
@@ -139,88 +154,157 @@ void sound_set_pitch(int permille)
139 * consume. Note that for mono, dst[0] equals dst[1], as there is no point 154 * consume. Note that for mono, dst[0] equals dst[1], as there is no point
140 * in processing the same data twice. 155 * in processing the same data twice.
141 */ 156 */
142static int convert_to_internal(const char* src[], int count, int32_t* dst[]) 157
158/* convert count 16-bit mono to 32-bit mono */
159static int convert_lte_native_mono(
160 const char *src[], int32_t *dst[], int count)
143{ 161{
144 count = MIN(SAMPLE_BUF_COUNT / 2, count); 162 count = MIN(SAMPLE_BUF_COUNT/2, count);
145 163
146 if ((dsp->sample_depth <= NATIVE_DEPTH) 164 const short *s = (short*) src[0];
147 || (dsp->stereo_mode == STEREO_INTERLEAVED)) 165 const short * const send = s + count;
148 { 166 int32_t *d = dst[0] = dst[1] = sample_buf;
149 dst[0] = &sample_buf[0]; 167 const int scale = WORD_SHIFT;
150 dst[1] = (dsp->stereo_mode == STEREO_MONO) 168
151 ? dst[0] : &sample_buf[SAMPLE_BUF_COUNT / 2]; 169 do
152 }
153 else
154 { 170 {
155 dst[0] = (int32_t*) src[0]; 171 *d++ = *s++ << scale;
156 dst[1] = (int32_t*) ((dsp->stereo_mode == STEREO_MONO) ? src[0] : src[1]);
157 } 172 }
173 while (s < send);
158 174
159 if (dsp->sample_depth <= NATIVE_DEPTH) 175 src[0] = (char *)s;
160 {
161 short* s0 = (short*) src[0];
162 int32_t* d0 = dst[0];
163 int32_t* d1 = dst[1];
164 int scale = WORD_SHIFT;
165 int i;
166 176
167 if (dsp->stereo_mode == STEREO_INTERLEAVED) 177 return count;
168 { 178}
169 for (i = 0; i < count; i++)
170 {
171 *d0++ = *s0++ << scale;
172 *d1++ = *s0++ << scale;
173 }
174 }
175 else if (dsp->stereo_mode == STEREO_NONINTERLEAVED)
176 {
177 short* s1 = (short*) src[1];
178 179
179 for (i = 0; i < count; i++) 180/* convert count 16-bit interleaved stereo to 32-bit noninterleaved */
180 { 181static int convert_lte_native_interleaved_stereo(
181 *d0++ = *s0++ << scale; 182 const char *src[], int32_t *dst[], int count)
182 *d1++ = *s1++ << scale; 183{
183 } 184 count = MIN(SAMPLE_BUF_COUNT/2, count);
184 }
185 else
186 {
187 for (i = 0; i < count; i++)
188 {
189 *d0++ = *s0++ << scale;
190 }
191 }
192 }
193 else if (dsp->stereo_mode == STEREO_INTERLEAVED)
194 {
195 int32_t* s0 = (int32_t*) src[0];
196 int32_t* d0 = dst[0];
197 int32_t* d1 = dst[1];
198 int i;
199 185
200 for (i = 0; i < count; i++) 186 const int32_t *s = (int32_t *) src[0];
201 { 187 const int32_t * const send = s + count;
202 *d0++ = *s0++; 188 int32_t *dl = dst[0] = sample_buf;
203 *d1++ = *s0++; 189 int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2;
204 } 190 const int scale = WORD_SHIFT;
205 }
206 191
207 if (dsp->stereo_mode == STEREO_NONINTERLEAVED) 192 do
208 { 193 {
209 src[0] += count * dsp->sample_bytes; 194 short slr = *s++;
210 src[1] += count * dsp->sample_bytes; 195#ifdef ROCKBOX_LITTLE_ENDIAN
196 *dl++ = (slr >> 16) << scale;
197 *dr++ = (int32_t)(short)slr << scale;
198#else /* ROCKBOX_BIG_ENDIAN */
199 *dl++ = (int32_t)(short)slr << scale;
200 *dr++ = (slr >> 16) << scale;
201#endif
211 } 202 }
212 else if (dsp->stereo_mode == STEREO_INTERLEAVED) 203 while (s < send);
204
205 src[0] = (char *)s;
206
207 return count;
208}
209
210/* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */
211static int convert_lte_native_noninterleaved_stereo(
212 const char *src[], int32_t *dst[], int count)
213{
214 const short *sl = (short *) src[0];
215 const short *sr = (short *) src[1];
216 const short * const slend = sl + count;
217 int32_t *dl = dst[0] = sample_buf;
218 int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2;
219 const int scale = WORD_SHIFT;
220
221 do
213 { 222 {
214 src[0] += count * dsp->sample_bytes * 2; 223 *dl++ = *sl++ << scale;
224 *dr++ = *sr++ << scale;
215 } 225 }
216 else 226 while (sl < slend);
227
228 src[0] = (char *)sl;
229 src[1] = (char *)sr;
230
231 return count;
232}
233
234/* convert count 32-bit mono to 32-bit mono */
235static int convert_gt_native_mono(
236 const char *src[], int32_t *dst[], int count)
237{
238 count = MIN(SAMPLE_BUF_COUNT/2, count);
239
240 dst[0] = dst[1] = (int32_t *)src[0];
241 src[0] = (char *)(dst[0] + count);
242
243 return count;
244}
245
246/* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */
247static int convert_gt_native_interleaved_stereo(
248 const char *src[], int32_t *dst[], int count)
249{
250 count = MIN(SAMPLE_BUF_COUNT/2, count);
251
252 const int32_t *s = (int32_t *)src[0];
253 const int32_t * const send = s + 2*count;
254 int32_t *dl = sample_buf;
255 int32_t *dr = sample_buf + SAMPLE_BUF_COUNT/2;
256
257 dst[0] = dl;
258 dst[1] = dr;
259
260 do
217 { 261 {
218 src[0] += count * dsp->sample_bytes; 262 *dl++ = *s++;
263 *dr++ = *s++;
219 } 264 }
265 while (s < send);
266
267 src[0] = (char *)send;
268
269 return count;
270}
271
272/* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */
273static int convert_gt_native_noninterleaved_stereo(
274 const char *src[], int32_t *dst[], int count)
275{
276 count = MIN(SAMPLE_BUF_COUNT/2, count);
277
278 dst[0] = (int32_t *)src[0];
279 dst[1] = (int32_t *)src[1];
280 src[0] = (char *)(dst[0] + count);
281 src[1] = (char *)(dst[1] + count);
220 282
221 return count; 283 return count;
222} 284}
223 285
286/* set the to-native sample conversion function based on dsp sample parameters */
287static void new_sample_conversion(void)
288{
289 static int (*convert_to_internal_functions[])(
290 const char* src[], int32_t *dst[], int count) =
291 {
292 [CONVERT_LE_NATIVE_MONO] = convert_lte_native_mono,
293 [CONVERT_LE_NATIVE_I_STEREO] = convert_lte_native_interleaved_stereo,
294 [CONVERT_LE_NATIVE_NI_STEREO] = convert_lte_native_noninterleaved_stereo,
295 [CONVERT_GT_NATIVE_MONO] = convert_gt_native_mono,
296 [CONVERT_GT_NATIVE_I_STEREO] = convert_gt_native_interleaved_stereo,
297 [CONVERT_GT_NATIVE_NI_STEREO] = convert_gt_native_noninterleaved_stereo,
298 };
299
300 int convert = dsp->stereo_mode;
301
302 if (dsp->sample_depth > NATIVE_DEPTH)
303 convert += CONVERT_GT_NATIVE_1ST_INDEX;
304
305 dsp->convert_to_internal = convert_to_internal_functions[convert];
306}
307
224static void resampler_set_delta(int frequency) 308static void resampler_set_delta(int frequency)
225{ 309{
226 resample_data[current_codec].delta = (unsigned long) 310 resample_data[current_codec].delta = (unsigned long)
@@ -230,124 +314,118 @@ static void resampler_set_delta(int frequency)
230/* Linear interpolation resampling that introduces a one sample delay because 314/* Linear interpolation resampling that introduces a one sample delay because
231 * of our inability to look into the future at the end of a frame. 315 * of our inability to look into the future at the end of a frame.
232 */ 316 */
233 317#ifndef DSP_HAVE_ASM_RESAMPLING
234/* TODO: we really should have a separate set of resample functions for both 318static int dsp_downsample(int channels, int count, struct resample_data *r,
235 mono and stereo to avoid all this internal branching and looping. */ 319 int32_t **src, int32_t **dst)
236static int downsample(int32_t **dst, int32_t **src, int count,
237 struct resample_data *r)
238{ 320{
239 long phase = r->phase;
240 long delta = r->delta; 321 long delta = r->delta;
241 int32_t last_sample; 322 long phase, pos;
242 int32_t *d[2] = { dst[0], dst[1] }; 323 int32_t *d;
243 int pos = phase >> 16; 324
244 int i = 1, j; 325 /* Rolled channel loop actually showed slightly faster. */
245 int num_channels = dsp->stereo_mode == STEREO_MONO ? 1 : 2; 326 do
246 327 {
247 for (j = 0; j < num_channels; j++) { 328 /* Just initialize things and not worry too much about the relatively
248 last_sample = r->last_sample[j]; 329 * uncommon case of not being able to spit out a sample for the frame.
330 */
331 int32_t *s = src[--channels];
332 int32_t last = r->last_sample[channels];
333
334 r->last_sample[channels] = s[count - 1];
335 d = dst[channels];
336 phase = r->phase;
337 pos = phase >> 16;
338
249 /* Do we need last sample of previous frame for interpolation? */ 339 /* Do we need last sample of previous frame for interpolation? */
250 if (pos > 0) 340 if (pos > 0)
251 last_sample = src[j][pos - 1]; 341 last = s[pos - 1];
252 342
253 /* Be sure starting position isn't passed the available data */ 343 while (pos < count)
254 if (pos < count)
255 *d[j]++ = last_sample + FRACMUL((phase & 0xffff) << 15,
256 src[j][pos] - last_sample);
257 else
258 { 344 {
259 /* No samples can be output here since were already passed the 345 *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
260 end. Keep phase, save the last sample and return nothing. */ 346 phase += delta;
261 i = 0; 347 pos = phase >> 16;
262 goto done; 348 last = s[pos - 1];
263 } 349 }
264 } 350 }
265 351 while (channels > 0);
266 phase += delta;
267
268 while ((pos = phase >> 16) < count)
269 {
270 for (j = 0; j < num_channels; j++)
271 *d[j]++ = src[j][pos - 1] + FRACMUL((phase & 0xffff) << 15,
272 src[j][pos] - src[j][pos - 1]);
273 phase += delta;
274 i++;
275 }
276 352
277 /* Wrap phase accumulator back to start of next frame. */ 353 /* Wrap phase accumulator back to start of next frame. */
278done:
279 r->phase = phase - (count << 16); 354 r->phase = phase - (count << 16);
280 r->last_sample[0] = src[0][count - 1]; 355 return d - dst[0];
281 r->last_sample[1] = src[1][count - 1];
282 return i;
283} 356}
284 357
285static long upsample(int32_t **dst, int32_t **src, int count, struct resample_data *r) 358static int dsp_upsample(int channels, int count, struct resample_data *r,
359 int32_t **src, int32_t **dst)
286{ 360{
287 long phase = r->phase;
288 long delta = r->delta; 361 long delta = r->delta;
289 int32_t *d[2] = { dst[0], dst[1] }; 362 long phase, pos;
290 int i = 0, j; 363 int32_t *d;
291 int pos;
292 int num_channels = dsp->stereo_mode == STEREO_MONO ? 1 : 2;
293
294 while ((phase >> 16) == 0)
295 {
296 for (j = 0; j < num_channels; j++)
297 *d[j]++ = r->last_sample[j] + FRACMUL((phase & 0xffff) << 15,
298 src[j][0] - r->last_sample[j]);
299 phase += delta;
300 i++;
301 }
302 364
303 while ((pos = phase >> 16) < count) 365 /* Rolled channel loop actually showed slightly faster. */
366 do
304 { 367 {
305 for (j = 0; j < num_channels; j++) 368 /* Should always be able to output a sample for a ratio up to
306 *d[j]++ = src[j][pos - 1] + FRACMUL((phase & 0xffff) << 15, 369 RESAMPLE_BUF_COUNT / SAMPLE_BUF_COUNT. */
307 src[j][pos] - src[j][pos - 1]); 370 int32_t *s = src[--channels];
308 phase += delta; 371 int32_t last = r->last_sample[channels];
309 i++; 372
373 r->last_sample[channels] = s[count - 1];
374 d = dst[channels];
375 phase = r->phase;
376 pos = phase >> 16;
377
378 while (pos == 0)
379 {
380 *d++ = last + FRACMUL((phase & 0xffff) << 15, s[0] - last);
381 phase += delta;
382 pos = phase >> 16;
383 }
384
385 while (pos < count)
386 {
387 last = s[pos - 1];
388 *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
389 phase += delta;
390 pos = phase >> 16;
391 }
310 } 392 }
393 while (channels > 0);
311 394
312 /* Wrap phase accumulator back to start of next frame. */ 395 /* Wrap phase accumulator back to start of next frame. */
313 r->phase = phase - (count << 16); 396 r->phase = phase & 0xffff;
314 r->last_sample[0] = src[0][count - 1]; 397 return d - dst[0];
315 r->last_sample[1] = src[1][count - 1];
316 return i;
317} 398}
399#endif /* DSP_HAVE_ASM_RESAMPLING */
318 400
319/* Resample count stereo samples. Updates the src array, if resampling is 401/* Resample count stereo samples. Updates the src array, if resampling is
320 * done, to refer to the resampled data. Returns number of stereo samples 402 * done, to refer to the resampled data. Returns number of stereo samples
321 * for further processing. 403 * for further processing.
322 */ 404 */
323static inline int resample(int32_t* src[], int count) 405static inline int resample(int32_t *src[], int count)
324{ 406{
325 long new_count; 407 long new_count = count;
326 408
327 if (dsp->frequency != NATIVE_FREQUENCY) 409 if (dsp->frequency != NATIVE_FREQUENCY)
328 { 410 {
329 int32_t* dst[2] = {&resample_buf[0], &resample_buf[RESAMPLE_BUF_COUNT / 2]}; 411 int32_t *dst[2] =
412 {
413 resample_buf,
414 resample_buf + RESAMPLE_BUF_COUNT/2,
415 };
416 int channels = dsp->num_channels;
330 417
331 if (dsp->frequency < NATIVE_FREQUENCY) 418 if (dsp->frequency < NATIVE_FREQUENCY)
332 { 419 new_count = dsp_upsample(channels, count,
333 new_count = upsample(dst, src, count, 420 &resample_data[current_codec],
334 &resample_data[current_codec]); 421 src, dst);
335 }
336 else 422 else
337 { 423 new_count = dsp_downsample(channels, count,
338 new_count = downsample(dst, src, count, 424 &resample_data[current_codec],
339 &resample_data[current_codec]); 425 src, dst);
340 }
341 426
342 src[0] = dst[0]; 427 src[0] = dst[0];
343 if (dsp->stereo_mode != STEREO_MONO) 428 src[1] = dst[channels - 1];
344 src[1] = dst[1];
345 else
346 src[1] = dst[0];
347 }
348 else
349 {
350 new_count = count;
351 } 429 }
352 430
353 return new_count; 431 return new_count;
@@ -378,8 +456,7 @@ void dsp_dither_enable(bool enable)
378 456
379static void dither_init(void) 457static void dither_init(void)
380{ 458{
381 memset(&dither_data[0], 0, sizeof(struct dither_data)); 459 memset(dither_data, 0, sizeof(dither_data));
382 memset(&dither_data[1], 0, sizeof(struct dither_data));
383 dsp->dither_bias = (1L << (dsp->frac_bits - NATIVE_DEPTH)); 460 dsp->dither_bias = (1L << (dsp->frac_bits - NATIVE_DEPTH));
384 dsp->dither_mask = (1L << (dsp->frac_bits + 1 - NATIVE_DEPTH)) - 1; 461 dsp->dither_mask = (1L << (dsp->frac_bits + 1 - NATIVE_DEPTH)) - 1;
385} 462}
@@ -592,7 +669,7 @@ void dsp_set_eq_coefs(int band)
592static void eq_process(int32_t **x, unsigned num) 669static void eq_process(int32_t **x, unsigned num)
593{ 670{
594 int i; 671 int i;
595 unsigned int channels = dsp->stereo_mode != STEREO_MONO ? 2 : 1; 672 unsigned int channels = dsp->num_channels;
596 unsigned shift; 673 unsigned shift;
597 674
598 /* filter configuration currently is 1 low shelf filter, 3 band peaking 675 /* filter configuration currently is 1 low shelf filter, 3 band peaking
@@ -772,7 +849,7 @@ int dsp_process(char *dst, const char *src[], int count)
772 849
773 while (count > 0) 850 while (count > 0)
774 { 851 {
775 samples = convert_to_internal(src, count, tmp); 852 samples = dsp->convert_to_internal(src, tmp, count);
776 count -= samples; 853 count -= samples;
777 apply_gain(tmp, samples); 854 apply_gain(tmp, samples);
778 samples = resample(tmp, samples); 855 samples = resample(tmp, samples);
@@ -886,7 +963,7 @@ bool dsp_configure(int setting, intptr_t value)
886 963
887 case DSP_SET_SAMPLE_DEPTH: 964 case DSP_SET_SAMPLE_DEPTH:
888 dsp->sample_depth = value; 965 dsp->sample_depth = value;
889 966
890 if (dsp->sample_depth <= NATIVE_DEPTH) 967 if (dsp->sample_depth <= NATIVE_DEPTH)
891 { 968 {
892 dsp->frac_bits = WORD_FRACBITS; 969 dsp->frac_bits = WORD_FRACBITS;
@@ -902,15 +979,19 @@ bool dsp_configure(int setting, intptr_t value)
902 dsp->clip_min = -(1 << value); 979 dsp->clip_min = -(1 << value);
903 } 980 }
904 981
982 new_sample_conversion();
905 dither_init(); 983 dither_init();
906 break; 984 break;
907 985
908 case DSP_SET_STEREO_MODE: 986 case DSP_SET_STEREO_MODE:
909 dsp->stereo_mode = (long) value; 987 dsp->stereo_mode = value;
988 dsp->num_channels = value == STEREO_MONO ? 1 : 2;
989 new_sample_conversion();
910 break; 990 break;
911 991
912 case DSP_RESET: 992 case DSP_RESET:
913 dsp->stereo_mode = STEREO_NONINTERLEAVED; 993 dsp->stereo_mode = STEREO_NONINTERLEAVED;
994 dsp->num_channels = 2;
914 dsp->clip_max = ((1 << WORD_FRACBITS) - 1); 995 dsp->clip_max = ((1 << WORD_FRACBITS) - 1);
915 dsp->clip_min = -((1 << WORD_FRACBITS)); 996 dsp->clip_min = -((1 << WORD_FRACBITS));
916 dsp->track_gain = 0; 997 dsp->track_gain = 0;
@@ -921,6 +1002,7 @@ bool dsp_configure(int setting, intptr_t value)
921 dsp->sample_depth = NATIVE_DEPTH; 1002 dsp->sample_depth = NATIVE_DEPTH;
922 dsp->frac_bits = WORD_FRACBITS; 1003 dsp->frac_bits = WORD_FRACBITS;
923 dsp->new_gain = true; 1004 dsp->new_gain = true;
1005 new_sample_conversion();
924 break; 1006 break;
925 1007
926 case DSP_FLUSH: 1008 case DSP_FLUSH:
diff --git a/apps/dsp.h b/apps/dsp.h
index 8e82b6118d..b99ac213ab 100644
--- a/apps/dsp.h
+++ b/apps/dsp.h
@@ -24,9 +24,13 @@
24#include <stdbool.h> 24#include <stdbool.h>
25 25
26#define NATIVE_FREQUENCY 44100 26#define NATIVE_FREQUENCY 44100
27#define STEREO_INTERLEAVED 0 27enum
28#define STEREO_NONINTERLEAVED 1 28{
29#define STEREO_MONO 2 29 STEREO_INTERLEAVED = 0,
30 STEREO_NONINTERLEAVED,
31 STEREO_MONO,
32 STEREO_NUM_MODES,
33};
30 34
31enum { 35enum {
32 CODEC_SET_FILEBUF_WATERMARK = 1, 36 CODEC_SET_FILEBUF_WATERMARK = 1,
diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h
index 04c2848a98..add76a07f8 100644
--- a/apps/dsp_asm.h
+++ b/apps/dsp_asm.h
@@ -27,5 +27,13 @@
27void apply_crossfeed(int32_t* src[], int count); 27void apply_crossfeed(int32_t* src[], int count);
28#endif 28#endif
29 29
30#if defined (CPU_COLDFIRE)
31#define DSP_HAVE_ASM_RESAMPLING
32int dsp_downsample(int channels, int count, void *resample_data,
33 int32_t **src, int32_t **dst);
34int dsp_upsample(int channels, int count, void *resample_data,
35 int32_t **src, int32_t **dst);
30#endif 36#endif
31 37
38#endif /* _DSP_ASM_H */
39
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S
index 719d1db1d5..233be82860 100644
--- a/apps/dsp_cf.S
+++ b/apps/dsp_cf.S
@@ -17,8 +17,11 @@
17 * 17 *
18 ****************************************************************************/ 18 ****************************************************************************/
19 19
20 .section .text 20/****************************************************************************
21 .global apply_crossfeed 21 * apply_crossfeed(int32_t* src[], int count)
22 */
23 .section .text
24 .global apply_crossfeed
22apply_crossfeed: 25apply_crossfeed:
23 lea.l (-44, %sp), %sp 26 lea.l (-44, %sp), %sp
24 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs 27 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
@@ -67,11 +70,11 @@ apply_crossfeed:
67 addq.l #1, %d4 | index++ 70 addq.l #1, %d4 | index++
68 moveq.l #13, %d6 71 moveq.l #13, %d6
69 cmp.l %d6, %d4 | wrap index to 0 if it overflows 72 cmp.l %d6, %d4 | wrap index to 0 if it overflows
70 jlt .nowrap 73 jlt .cfnowrap
71 moveq.l #13*8, %d4 74 moveq.l #13*8, %d4
72 sub.l %d4, %a0 | wrap back delay line ptr as well 75 sub.l %d4, %a0 | wrap back delay line ptr as well
73 clr.l %d4 76 clr.l %d4
74.nowrap: 77.cfnowrap:
75 subq.l #1, %d7 78 subq.l #1, %d7
76 jne .cfloop 79 jne .cfloop
77 | save data back to struct 80 | save data back to struct
@@ -81,4 +84,138 @@ apply_crossfeed:
81 movem.l (%sp), %d2-%d7/%a2-%a6 84 movem.l (%sp), %d2-%d7/%a2-%a6
82 lea.l (44, %sp), %sp 85 lea.l (44, %sp), %sp
83 rts 86 rts
87.cfend:
88 .size apply_crossfeed,.cfend-apply_crossfeed
84 89
90/****************************************************************************
91 * dsp_downsample(int channels, int count, struct resample_data *r,
92 * in32_t **src, int32_t **dst)
93 */
94 .section .text
95 .global dsp_downsample
96dsp_downsample:
97 lea.l -40(%sp), %sp | save non-clobberables
98 movem.l %d2-%d7/%a2-%a5, (%sp) |
99 movem.l 44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels
100 | %d3 = count
101 | %a0 = r
102 | %a1 = src
103 | %a2 = dst
104 move.l 4(%a0), %d4 | %d4 = delta = r->delta
105 move.l #16, %d7 | %d7 = shift
106.dschannel_loop:
107 move.l (%a0), %d5 | %d5 = phase = r->phase
108 move.l -4(%a1, %d2.l*4), %a3 | %a3 = s = src[ch-1]
109 move.l -4(%a2, %d2.l*4), %a4 | %a4 = d = dst[ch-1]
110 lea.l 4(%a0, %d2.l*4), %a5 | %a5 = &r->last_sample[ch-1]
111 move.l (%a5), %d0 | %d0 = last = r->last_sample[ch-1]
112 move.l -4(%a3, %d3.l*4), %d1 | r->last_sample[ch-1] = s[count-1]
113 move.l %d1, (%a5) |
114 move.l %d5, %d6 | %d6 = pos = phase >> 16
115 lsr.l %d7, %d6 |
116 cmp.l %d3, %d6 | past end of samples?
117 bge.b .dsloop_skip | yes? skip loop
118 tst.l %d6 | need last sample of prev. frame?
119 bne.b .dsloop | no? start main loop
120 move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos]
121 bra.b .dsuse_last_start | start with last (last in %d0)
122.dsloop:
123 lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
124 movem.l (%a5), %d0-%d1 |
125.dsuse_last_start:
126 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
127 move.l %d0, %acc0 | %acc0 = previous sample
128 move.l %d5, %d0 | frac = (phase << 16) >> 1
129 lsl.l %d7, %d0 |
130 lsr.l #1, %d0 |
131 mac.l %d0, %d1, %acc0 | %acc0 += frac * diff
132 move.l %acc0, %d0 |
133 add.l %d4, %d5 | phase += delta
134 move.l %d5, %d6 | pos = phase >> 16
135 lsr.l %d7, %d6 |
136 move.l %d0, (%a4)+ | *d++ = %d0
137 cmp.l %d3, %d6 | pos < count?
138 blt.b .dsloop | yes? continue resampling
139.dsloop_skip:
140 subq.l #1, %d2 | ch > 0?
141 bgt.b .dschannel_loop | yes? process next channel
142 asl.l %d7, %d3 | wrap phase to start of next frame
143 sub.l %d3, %d5 | r->phase = phase - (count << 16)
144 move.l %d5, (%a0) |
145 move.l %a4, %d0 | return d - d[0]
146 sub.l (%a2), %d0 |
147 asr.l #2, %d0 | convert bytes->samples
148 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
149 move.l %acc1, %acc0 | clear %acc0
150 lea.l 40(%sp), %sp | cleanup stack
151 rts | buh-bye
152.dsend:
153 .size dsp_downsample,.dsend-dsp_downsample
154
155/****************************************************************************
156 * dsp_upsample(int channels, int count, struct resample_data *r,
157 * in32_t **src, int32_t **dst)
158 */
159 .section .text
160 .global dsp_upsample
161dsp_upsample:
162 lea.l -40(%sp), %sp | save non-clobberables
163 movem.l %d2-%d7/%a2-%a5, (%sp) |
164 movem.l 44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels
165 | %d3 = count
166 | %a0 = r
167 | %a1 = src
168 | %a2 = dst
169 move.l 4(%a0), %d4 | %d4 = delta = r->delta
170 swap %d4 | swap delta to high word to use
171 | carries to increment position
172.uschannel_loop:
173 move.l (%a0), %d5 | %d5 = phase = r->phase
174 move.l -4(%a1, %d2.l*4), %a3 | %a3 = s = src[ch-1]
175 move.l -4(%a2, %d2.l*4), %a4 | %a4 = d = dst[ch-1]
176 lea.l 4(%a0, %d2.l*4), %a5 | %a5 = &r->last_sample[ch-1]
177 move.l (%a5), %d0 | %d0 = last = r->last_sample[ch-1]
178 move.l -4(%a3, %d3.l*4), %d1 | r->last_sample[ch-1] = s[count-1]
179 move.l %d1, (%a5) |
180 moveq.l #16, %d1 | %d0 = shift
181 move.l %d5, %d6 | %d6 = pos = phase >> 16
182 lsl.l %d1, %d5 | swap phase to high word to use
183 | carries to increment position
184 lsr.l %d1, %d6 | pos == 0?
185 bne.b .usstart_1 | no? transistion from down
186 move.l (%a3), %d1 | %d1 = s[0]
187 sub.l %d0, %d1 | diff = s[pos] - last
188 bra.b .usloop_0 | jump to typical start point
189.usstart_1:
190 cmp.l %d3, %d6 | past end of samples?
191 bge.b .usloop_skip | yes? skip loop
192.usloop_1:
193 lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
194 movem.l (%a5), %d0-%d1 |
195 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
196.usloop_0:
197 move.l %d0, %acc0 | %acc0 = previous sample
198 lsr.l #1, %d5 | make phase into frac
199 mac.l %d1, %d5, %acc0 | %acc0 += diff * frac
200 move.l %acc0, %d7 |
201 lsl.l #1, %d5 | restore frac to phase
202 move.l %d7, (%a4)+ | *d++ = %d0
203 add.l %d4, %d5 | phase += delta
204 bcc.b .usloop_0 | load next values?
205 addq.l #1, %d6 | increment position
206 cmp.l %d3, %d6 | pos < count?
207 blt.b .usloop_1 | yes? continue resampling
208.usloop_skip:
209 subq.l #1, %d2 | ch > 0?
210 bgt.b .uschannel_loop | yes? process next channel
211 swap %d5 | wrap phase to start of next frame
212 move.l %d5, (%a0) | ...and save in r->phase
213 move.l %a4, %d0 | return d - d[0]
214 sub.l (%a2), %d0 |
215 asr.l #2, %d0 | convert bytes->samples
216 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
217 move.l %acc1, %acc0 | clear %acc0
218 lea.l 40(%sp), %sp | cleanup stack
219 rts | buh-bye
220.usend:
221 .size dsp_upsample,.usend-dsp_upsample