summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Sevakis <jethead71@rockbox.org>2007-02-27 14:25:36 +0000
committerMichael Sevakis <jethead71@rockbox.org>2007-02-27 14:25:36 +0000
commit6fbdb912b0416d573dd2656310a5035063df3fe5 (patch)
tree8d478928169554bf5358c2686d657ada48cc705a
parent8ca99d3288e416e15b1d48d2b81ea9f8bb774425 (diff)
downloadrockbox-6fbdb912b0416d573dd2656310a5035063df3fe5.tar.gz
rockbox-6fbdb912b0416d573dd2656310a5035063df3fe5.zip
SWCODEC: Tighten up coldfire assembly a little bit more. Cleanup to make differing parameters between ARM and Coldfire halfway clean. Hopefully those differences can be reconciled soon. A tiny bit of C optimizing for karaoke channel mode.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12505 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/dsp.c122
-rw-r--r--apps/dsp_asm.h19
-rw-r--r--apps/dsp_cf.S171
3 files changed, 159 insertions, 153 deletions
diff --git a/apps/dsp.c b/apps/dsp.c
index f10bdfe2a6..1da7372de6 100644
--- a/apps/dsp.c
+++ b/apps/dsp.c
@@ -112,7 +112,7 @@ struct crossfeed_data
112 int32_t coefs[3]; /* 04h - Coefficients for the shelving filter */ 112 int32_t coefs[3]; /* 04h - Coefficients for the shelving filter */
113 int32_t history[4]; /* 10h - Format is x[n - 1], y[n - 1] for both channels */ 113 int32_t history[4]; /* 10h - Format is x[n - 1], y[n - 1] for both channels */
114 int32_t delay[13][2]; /* 20h */ 114 int32_t delay[13][2]; /* 20h */
115 int index; /* 88h - Current index into the delay line */ 115 int index; /* 88h - Current index/pointer into the delay line */
116 /* 8ch */ 116 /* 8ch */
117}; 117};
118 118
@@ -129,13 +129,21 @@ struct eq_state
129 129
130/* Include header with defines which functions are implemented in assembly 130/* Include header with defines which functions are implemented in assembly
131 code for the target */ 131 code for the target */
132#ifndef SIMULATOR
133#include <dsp_asm.h> 132#include <dsp_asm.h>
134#endif
135 133
136#ifndef DSP_HAVE_ASM_CROSSFEED 134/* Typedefs keep things much neater in this case */
137static void apply_crossfeed(int32_t *buf[], int count); 135typedef int (*sample_input_fn_type)(int count, const char *src[],
138#endif 136 int32_t *dst[]);
137typedef int (*resample_fn_type)(int count, struct dsp_data *data,
138 int32_t *src[], int32_t *dst[]);
139typedef void (*sample_output_fn_type)(int count, struct dsp_data *data,
140 int32_t *src[], int16_t *dst);
141/* If ACF_SWITCHPARAM is no longer needed, make apply_crossfeed of type
142 channels_process_fn_type since it is really just that */
143typedef void (*apply_crossfeed_fn_type)(ACF_SWITCHPARAM(int count,
144 int32_t *buf[]));
145typedef void (*channels_process_fn_type)(int count, int32_t *buf[]);
146
139/* 147/*
140 ***************************************************************************/ 148 ***************************************************************************/
141 149
@@ -151,15 +159,13 @@ struct dsp_config
151 long gain; /* Note that this is in S8.23 format. */ 159 long gain; /* Note that this is in S8.23 format. */
152 /* Functions that change depending upon settings - NULL if stage is 160 /* Functions that change depending upon settings - NULL if stage is
153 disabled */ 161 disabled */
154 int (*input_samples)(int count, const char *src[], int32_t *dst[]); 162 sample_input_fn_type input_samples;
155 int (*resample)(int count, struct dsp_data *data, 163 resample_fn_type resample;
156 int32_t *src[], int32_t *dst[]); 164 sample_output_fn_type output_samples;
157 void (*output_samples)(int count, struct dsp_data *data,
158 int32_t *src[], int16_t *dst);
159 /* These will be NULL for the voice codec and is more economical that 165 /* These will be NULL for the voice codec and is more economical that
160 way */ 166 way */
161 void (*apply_crossfeed)(int32_t *src[], int count); 167 apply_crossfeed_fn_type apply_crossfeed;
162 void (*channels_process)(int count, int32_t *buf[]); 168 channels_process_fn_type channels_process;
163}; 169};
164 170
165/* General DSP config */ 171/* General DSP config */
@@ -169,7 +175,14 @@ static struct dither_data dither_data[2] IBSS_ATTR; /* 0=left, 1=right */
169static long dither_mask IBSS_ATTR; 175static long dither_mask IBSS_ATTR;
170static long dither_bias IBSS_ATTR; 176static long dither_bias IBSS_ATTR;
171/* Crossfeed */ 177/* Crossfeed */
172struct crossfeed_data crossfeed_data IBSS_ATTR; /* A */ 178struct crossfeed_data crossfeed_data IDATA_ATTR = /* A */
179{
180#ifdef DSP_CROSSFEED_DELAY_PTR
181 .index = (intptr_t)crossfeed_data.delay
182#else
183 .index = 0
184#endif
185};
173/* Equalizer */ 186/* Equalizer */
174static struct eq_state eq_data; /* A/V */ 187static struct eq_state eq_data; /* A/V */
175#ifdef HAVE_SW_TONE_CONTROLS 188#ifdef HAVE_SW_TONE_CONTROLS
@@ -401,8 +414,7 @@ static int sample_input_gt_native_ni_stereo(
401 */ 414 */
402static void sample_input_new_format(void) 415static void sample_input_new_format(void)
403{ 416{
404 static int (* const sample_input_functions[])( 417 static const sample_input_fn_type sample_input_functions[] =
405 int count, const char* src[], int32_t *dst[]) =
406 { 418 {
407 [SAMPLE_INPUT_LE_NATIVE_MONO] = sample_input_lte_native_mono, 419 [SAMPLE_INPUT_LE_NATIVE_MONO] = sample_input_lte_native_mono,
408 [SAMPLE_INPUT_LE_NATIVE_I_STEREO] = sample_input_lte_native_i_stereo, 420 [SAMPLE_INPUT_LE_NATIVE_I_STEREO] = sample_input_lte_native_i_stereo,
@@ -539,9 +551,7 @@ static void sample_output_dithered(int count, struct dsp_data *data,
539 */ 551 */
540static void sample_output_new_format(void) 552static void sample_output_new_format(void)
541{ 553{
542 static void (* const sample_output_functions[])( 554 static const sample_output_fn_type sample_output_functions[] =
543 int count, struct dsp_data *data,
544 int32_t *src[], int16_t *dst) =
545 { 555 {
546 sample_output_mono, 556 sample_output_mono,
547 sample_output_stereo, 557 sample_output_stereo,
@@ -695,42 +705,13 @@ void dsp_dither_enable(bool enable)
695 switch_dsp(old_dsp); 705 switch_dsp(old_dsp);
696} 706}
697 707
698/**
699 * dsp_set_crossfeed(bool enable)
700 *
701 * !DSPPARAMSYNC
702 * needs syncing with changes to the following dsp parameters:
703 * * dsp->stereo_mode (A)
704 */
705void dsp_set_crossfeed(bool enable)
706{
707 crossfeed_enabled = enable;
708 audio_dsp->apply_crossfeed =
709 (enable && audio_dsp->data.num_channels > 1)
710 ? apply_crossfeed : NULL;
711}
712
713void dsp_set_crossfeed_direct_gain(int gain)
714{
715 crossfeed_data.gain = get_replaygain_int(gain * -10) << 7;
716}
717
718void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain, long cutoff)
719{
720 long g1 = get_replaygain_int(lf_gain * -10) << 3;
721 long g2 = get_replaygain_int(hf_gain * -10) << 3;
722
723 filter_shelf_coefs(0xffffffff/NATIVE_FREQUENCY*cutoff, g1, g2,
724 crossfeed_data.coefs);
725}
726
727/* Applies crossfeed to the stereo signal in src. 708/* Applies crossfeed to the stereo signal in src.
728 * Crossfeed is a process where listening over speakers is simulated. This 709 * Crossfeed is a process where listening over speakers is simulated. This
729 * is good for old hard panned stereo records, which might be quite fatiguing 710 * is good for old hard panned stereo records, which might be quite fatiguing
730 * to listen to on headphones with no crossfeed. 711 * to listen to on headphones with no crossfeed.
731 */ 712 */
732#ifndef DSP_HAVE_ASM_CROSSFEED 713#ifndef DSP_HAVE_ASM_CROSSFEED
733static void apply_crossfeed(int32_t *buf[], int count) 714static void apply_crossfeed(int count, int32_t *buf[])
734{ 715{
735 int32_t *hist_l = &crossfeed_data.history[0]; 716 int32_t *hist_l = &crossfeed_data.history[0];
736 int32_t *hist_r = &crossfeed_data.history[2]; 717 int32_t *hist_r = &crossfeed_data.history[2];
@@ -775,7 +756,36 @@ static void apply_crossfeed(int32_t *buf[], int count)
775 /* Write back local copies of data we've modified */ 756 /* Write back local copies of data we've modified */
776 crossfeed_data.index = di; 757 crossfeed_data.index = di;
777} 758}
778#endif 759#endif /* DSP_HAVE_ASM_CROSSFEED */
760
761/**
762 * dsp_set_crossfeed(bool enable)
763 *
764 * !DSPPARAMSYNC
765 * needs syncing with changes to the following dsp parameters:
766 * * dsp->stereo_mode (A)
767 */
768void dsp_set_crossfeed(bool enable)
769{
770 crossfeed_enabled = enable;
771 audio_dsp->apply_crossfeed =
772 (enable && audio_dsp->data.num_channels > 1)
773 ? apply_crossfeed : NULL;
774}
775
776void dsp_set_crossfeed_direct_gain(int gain)
777{
778 crossfeed_data.gain = get_replaygain_int(gain * -10) << 7;
779}
780
781void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain, long cutoff)
782{
783 long g1 = get_replaygain_int(lf_gain * -10) << 3;
784 long g2 = get_replaygain_int(hf_gain * -10) << 3;
785
786 filter_shelf_coefs(0xffffffff/NATIVE_FREQUENCY*cutoff, g1, g2,
787 crossfeed_data.coefs);
788}
779 789
780/* Combine all gains to a global gain. */ 790/* Combine all gains to a global gain. */
781static void set_gain(struct dsp_config *dsp) 791static void set_gain(struct dsp_config *dsp)
@@ -1056,10 +1066,9 @@ static void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
1056 1066
1057 do 1067 do
1058 { 1068 {
1059 int32_t l = *sl/2; 1069 int32_t ch = *sl/2 - *sr/2;
1060 int32_t r = *sr/2; 1070 *sl++ = ch;
1061 *sl++ = l - r; 1071 *sr++ = -ch;
1062 *sr++ = r - l;
1063 } 1072 }
1064 while (--count > 0); 1073 while (--count > 0);
1065} 1074}
@@ -1067,8 +1076,7 @@ static void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
1067 1076
1068void channels_set(int value) 1077void channels_set(int value)
1069{ 1078{
1070 static void (* const channels_process_functions[])( 1079 static const channels_process_fn_type channels_process_functions[] =
1071 int count, int32_t *buf[]) =
1072 { 1080 {
1073 /* SOUND_CHAN_STEREO = All-purpose index for no channel processing */ 1081 /* SOUND_CHAN_STEREO = All-purpose index for no channel processing */
1074 [SOUND_CHAN_STEREO] = NULL, 1082 [SOUND_CHAN_STEREO] = NULL,
@@ -1118,7 +1126,7 @@ int dsp_process(char *dst, const char *src[], int count)
1118 if ((samples = resample(samples, tmp)) <= 0) 1126 if ((samples = resample(samples, tmp)) <= 0)
1119 break; /* I'm pretty sure we're downsampling here */ 1127 break; /* I'm pretty sure we're downsampling here */
1120 if (dsp->apply_crossfeed) 1128 if (dsp->apply_crossfeed)
1121 dsp->apply_crossfeed(tmp, samples); 1129 dsp->apply_crossfeed(ACF_SWITCHPARAM(samples, tmp));
1122 /* TODO: EQ and tone controls need separate structs for audio and voice 1130 /* TODO: EQ and tone controls need separate structs for audio and voice
1123 * DSP processing thanks to filter history. isn't really audible now, but 1131 * DSP processing thanks to filter history. isn't really audible now, but
1124 * might be the day we start handling voice more delicately. 1132 * might be the day we start handling voice more delicately.
diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h
index aaf7e666ec..a9e7fac6b0 100644
--- a/apps/dsp_asm.h
+++ b/apps/dsp_asm.h
@@ -22,10 +22,22 @@
22#ifndef _DSP_ASM_H 22#ifndef _DSP_ASM_H
23#define _DSP_ASM_H 23#define _DSP_ASM_H
24 24
25#define ACF_SWITCHPARAM(count, buf) count, buf
26
27#ifndef SIMULATOR
28
25#if defined(CPU_COLDFIRE) || defined(CPU_ARM) 29#if defined(CPU_COLDFIRE) || defined(CPU_ARM)
26#define DSP_HAVE_ASM_CROSSFEED 30#define DSP_HAVE_ASM_CROSSFEED
27void apply_crossfeed(int32_t *src[], int count); 31#if defined(CPU_COLDFIRE)
32/* ACF_SWITCHPARAM can be stripped out if all have the same parameter
33 order - DSP_CROSSFEED_DELAY_PTR if all use a pointer instead of index */
34#define DSP_CROSSFEED_DELAY_PTR
35#else
36#undef ACF_SWITCHPARAM
37#define ACF_SWITCHPARAM(count, buf) buf, count
28#endif 38#endif
39void apply_crossfeed(ACF_SWITCHPARAM(int count, int32_t *buf[]));
40#endif /* defined(CPU_COLDFIRE) || defined(CPU_ARM) */
29 41
30#if defined (CPU_COLDFIRE) 42#if defined (CPU_COLDFIRE)
31#define DSP_HAVE_ASM_RESAMPLING 43#define DSP_HAVE_ASM_RESAMPLING
@@ -45,5 +57,8 @@ void sample_output_mono(int count, struct dsp_data *data,
45#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO 57#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
46void sample_output_stereo(int count, struct dsp_data *data, 58void sample_output_stereo(int count, struct dsp_data *data,
47 int32_t *src[], int16_t *dst); 59 int32_t *src[], int16_t *dst);
48#endif 60#endif /* CPU_COLDFIRE */
61
62#endif /* SIMULATOR */
63
49#endif /* _DSP_ASM_H */ 64#endif /* _DSP_ASM_H */
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S
index 3c48258b5a..497b551be3 100644
--- a/apps/dsp_cf.S
+++ b/apps/dsp_cf.S
@@ -8,6 +8,7 @@
8 * $Id$ 8 * $Id$
9 * 9 *
10 * Copyright (C) 2006 Thom Johansen 10 * Copyright (C) 2006 Thom Johansen
11 * Portions Copyright (C) 2007 Michael Sevakis
11 * 12 *
12 * All files in this archive are subject to the GNU General Public License. 13 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement. 14 * See the file COPYING in the source tree root for full license agreement.
@@ -18,75 +19,63 @@
18 ****************************************************************************/ 19 ****************************************************************************/
19 20
20/**************************************************************************** 21/****************************************************************************
21 * void apply_crossfeed(int32_t *src[], int count) 22 * void apply_crossfeed(int count, int32_t *src[])
22 */ 23 */
23 .section .text 24 .section .text
24 .global apply_crossfeed 25 .global apply_crossfeed
25apply_crossfeed: 26apply_crossfeed:
26 lea.l (-44, %sp), %sp 27 lea.l -44(%sp), %sp
27 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs 28 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
28 move.l (44+4, %sp), %a4 29 movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src
29 movem.l (%a4), %a4-%a5 | a4 = src[0], a5 = src[1] 30 movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1]
30 move.l (44+8, %sp), %d7 | d7 = count 31 lea.l crossfeed_data, %a1
31 32 move.l (%a1)+, %a6 | a6 = direct gain
32 lea.l crossfeed_data, %a1 33 movem.l 12(%a1), %d0-%d3 | fetch filter history samples
33 lea.l (8*4, %a1), %a0 | a0 = &delay[0][0] 34 move.l 132(%a1), %a0 | fetch delay line address
34 move.l (%a1)+, %a6 | a6 = direct gain 35 movem.l (%a1), %a1-%a3 | load filter coefs
35 movem.l (3*4, %a1), %d0-%d3 | fetch filter history samples
36 move.l (33*4, %a1), %d4 | fetch delay line index
37 movem.l (%a1), %a1-%a3 | load filter coefs
38 move.l %d4, %d5
39 lsl.l #3, %d5
40 add.l %d5, %a0 | point a0 to current delay position
41| lea.l (%d4*4, %a0), %a0
42| lea.l (%d4*4, %a0), %a0 | point a0 to current delay position
43 /* Register usage in loop: 36 /* Register usage in loop:
44 * a0 = &delay[index][0], a1..a3 = b0, b1, a1 (filter coefs), 37 * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs),
45 * a4 = src[0], a5 = src[1], a6 = direct gain, 38 * %a4 = src[0], %a5 = src[1], %a6 = direct gain,
46 * d0..d3 = history 39 * %d0..%d3 = history
47 * d4 = delay line index, 40 * %d4..%d6 = temp.
48 * d5,d6 = temp. 41 * %d7 = count
49 * d7 = count
50 */ 42 */
51.cfloop: 43.cfloop:
52 mac.l %a2, %d0, (4, %a0), %d0, %acc0 | acc = b1*dr[n - 1] d0 = dr[n] 44 mac.l %a2, %d0, 4(%a0), %d0, %acc0 | acc = b1*dr[n - 1] d0 = dr[n]
53 mac.l %a1, %d0, %acc0 | acc += b0*dr[n] 45 mac.l %a1, %d0 , %acc0 | acc += b0*dr[n]
54 mac.l %a3, %d1, (%a4), %d5, %acc0 | acc += a1*y_l[n - 1], load left input 46 mac.l %a3, %d1, (%a4), %d4, %acc0 | acc += a1*y_l[n - 1], load L
55 move.l %acc0, %d1 | get filtered delayed sample 47 move.l %acc0, %d1 | get filtered delayed sample
56 mac.l %a6, %d5, %acc0 | acc += gain*x_l[n] 48 mac.l %a6, %d4, %acc0 | acc += gain*x_l[n]
57 movclr.l %acc0, %d6 49 movclr.l %acc0, %d6 |
58 move.l %d6, (%a4)+ | write result 50 move.l %d6, (%a4)+ | write result
59 51
60 mac.l %a2, %d2, (%a0), %d2, %acc0 | acc = b1*dl[n - 1], d2 = dl[n] 52 mac.l %a2, %d2, (%a0), %d2, %acc0 | acc = b1*dl[n - 1], d2 = dl[n]
61 move.l %d5, (%a0)+ | save left input to delay line 53 mac.l %a1, %d2 , %acc0 | acc += b0*dl[n]
62 mac.l %a1, %d2, %acc0 | acc += b0*dl[n] 54 mac.l %a3, %d3, (%a5), %d5, %acc0 | acc += a1*y_r[n - 1], load R
63 mac.l %a3, %d3, (%a5), %d5, %acc0 | acc += a1*y_r[n - 1], load right input 55 movem.l %d4-%d5, (%a0) | save left & right inputs to delay line
64 move.l %acc0, %d3 | get filtered delayed sample 56 move.l %acc0, %d3 | get filtered delayed sample
65 mac.l %a6, %d5, %acc0 | acc += gain*x_r[n] 57 mac.l %a6, %d5, %acc0 | acc += gain*x_r[n]
66 move.l %d5, (%a0)+ | save right input to delay line 58 lea.l 8(%a0), %a0 | increment delay pointer
67 movclr.l %acc0, %d6 59 movclr.l %acc0, %d6 |
68 move.l %d6, (%a5)+ | write result 60 move.l %d6, (%a5)+ | write result
69 61
70 addq.l #1, %d4 | index++ 62 cmpa.l #crossfeed_data+136, %a0| wrap a0 if passed end
71 moveq.l #13, %d6 63 bge.b .cfwrap |
72 cmp.l %d6, %d4 | wrap index to 0 if it overflows 64 .word 0x51fb | tpf.l - trap the buffer wrap
73 jlt .cfnowrap 65.cfwrap:
74 moveq.l #13*8, %d4 66 lea.l -104(%a0), %a0 | wrap
75 sub.l %d4, %a0 | wrap back delay line ptr as well 67 subq.l #1, %d7 | --count < 0 ?
76 clr.l %d4 68 bgt.b .cfloop |
77.cfnowrap: 69 lea.l crossfeed_data+16, %a1 | save data back to struct
78 subq.l #1, %d7 70 movem.l %d0-%d3, (%a1) | ...history
79 jne .cfloop 71 move.l %a0, 120(%a1) | ...delay_p
80 | save data back to struct 72 movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs
81 lea.l crossfeed_data + 4*4, %a1 73 lea.l 44(%sp), %sp
82 movem.l %d0-%d3, (%a1)
83 move.l %d4, (30*4, %a1)
84 movem.l (%sp), %d2-%d7/%a2-%a6
85 lea.l (44, %sp), %sp
86 rts 74 rts
87.cfend: 75.cfend:
88 .size apply_crossfeed,.cfend-apply_crossfeed 76 .size apply_crossfeed,.cfend-apply_crossfeed
89 77
78
90/**************************************************************************** 79/****************************************************************************
91 * int dsp_downsample(int count, struct dsp_data *data, 80 * int dsp_downsample(int count, struct dsp_data *data,
92 * in32_t *src[], int32_t *dst[]) 81 * in32_t *src[], int32_t *dst[])
@@ -128,10 +117,10 @@ dsp_downsample:
128 lsl.l %d7, %d0 | 117 lsl.l %d7, %d0 |
129 lsr.l #1, %d0 | 118 lsr.l #1, %d0 |
130 mac.l %d0, %d1, %acc0 | %acc0 += frac * diff 119 mac.l %d0, %d1, %acc0 | %acc0 += frac * diff
131 move.l %acc0, %d0 |
132 add.l %d4, %d5 | phase += delta 120 add.l %d4, %d5 | phase += delta
133 move.l %d5, %d6 | pos = phase >> 16 121 move.l %d5, %d6 | pos = phase >> 16
134 lsr.l %d7, %d6 | 122 lsr.l %d7, %d6 |
123 movclr.l %acc0, %d0 |
135 move.l %d0, (%a4)+ | *d++ = %d0 124 move.l %d0, (%a4)+ | *d++ = %d0
136 cmp.l %d2, %d6 | pos < count? 125 cmp.l %d2, %d6 | pos < count?
137 blt.b .dsloop | yes? continue resampling 126 blt.b .dsloop | yes? continue resampling
@@ -145,7 +134,6 @@ dsp_downsample:
145 sub.l (%a2), %d0 | 134 sub.l (%a2), %d0 |
146 asr.l #2, %d0 | convert bytes->samples 135 asr.l #2, %d0 | convert bytes->samples
147 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables 136 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
148 move.l %acc1, %acc0 | clear %acc0
149 lea.l 40(%sp), %sp | cleanup stack 137 lea.l 40(%sp), %sp | cleanup stack
150 rts | buh-bye 138 rts | buh-bye
151.dsend: 139.dsend:
@@ -196,8 +184,8 @@ dsp_upsample:
196.usloop_0: 184.usloop_0:
197 lsr.l #1, %d5 | make phase into frac 185 lsr.l #1, %d5 | make phase into frac
198 mac.l %d1, %d5, %acc0 | %acc0 = diff * frac 186 mac.l %d1, %d5, %acc0 | %acc0 = diff * frac
199 movclr.l %acc0, %d7 | %d7 = product
200 lsl.l #1, %d5 | restore frac to phase 187 lsl.l #1, %d5 | restore frac to phase
188 movclr.l %acc0, %d7 | %d7 = product
201 add.l %d0, %d7 | %d7 = last + product 189 add.l %d0, %d7 | %d7 = last + product
202 move.l %d7, (%a4)+ | *d++ = %d7 190 move.l %d7, (%a4)+ | *d++ = %d7
203 add.l %d4, %d5 | phase += delta 191 add.l %d4, %d5 | phase += delta
@@ -272,10 +260,10 @@ channels_process_sound_chan_custom:
272 move.l dsp_sw_cross, %d4 | load cross (side) gain 260 move.l dsp_sw_cross, %d4 | load cross (side) gain
2731: 2611:
274 move.l (%a0), %d1 | 262 move.l (%a0), %d1 |
275 mac.l %d1, %d3 , (%a1), %d2, %acc0 | L = l*gain + r*cross 263 mac.l %d1, %d3, (%a1), %d2, %acc0 | L = l*gain + r*cross
276 mac.l %d1, %d4 , %acc1 | R = r*gain + l*cross 264 mac.l %d1, %d4 , %acc1 | R = r*gain + l*cross
277 mac.l %d2, %d4 , %acc0 | 265 mac.l %d2, %d4 , %acc0 |
278 mac.l %d2, %d3 , %acc1 | 266 mac.l %d2, %d3 , %acc1 |
279 movclr.l %acc0, %d1 | 267 movclr.l %acc0, %d1 |
280 movclr.l %acc1, %d2 | 268 movclr.l %acc1, %d2 |
281 move.l %d1, (%a0)+ | 269 move.l %d1, (%a0)+ |
@@ -306,15 +294,12 @@ channels_process_sound_chan_karaoke:
306 move.l #0x40000000, %d4 | %d3 = 0.5 294 move.l #0x40000000, %d4 | %d3 = 0.5
3071: 2951:
308 move.l (%a0), %d1 | 296 move.l (%a0), %d1 |
309 mac.l %d1, %d4, (%a1), %d2, %acc0 | L = l/2 - r/2 297 msac.l %d1, %d4, (%a1), %d2, %acc0 | R = r/2 - l/2
310 mac.l %d2, %d4, %acc1 | R = r/2 - l/2 298 mac.l %d2, %d4 , %acc0 |
311 movclr.l %acc0, %d1 | 299 movclr.l %acc0, %d1 |
312 movclr.l %acc1, %d2 | 300 move.l %d1, (%a1)+ |
313 move.l %d1, %d3 | 301 neg.l %d1 | L = -R = -(r/2 - l/2) = l/2 - r/2
314 sub.l %d2, %d1 |
315 sub.l %d3, %d2 |
316 move.l %d1, (%a0)+ | 302 move.l %d1, (%a0)+ |
317 move.l %d2, (%a1)+ |
318 subq.l #1, %d0 | 303 subq.l #1, %d0 |
319 bgt.s 1b | 304 bgt.s 1b |
320 movem.l (%sp), %d1-%d4 | restore registers 305 movem.l (%sp), %d1-%d4 | restore registers
@@ -323,7 +308,6 @@ channels_process_sound_chan_karaoke:
323 rts 308 rts
324.cpkaraoke_end: 309.cpkaraoke_end:
325 .size channels_process_sound_chan_karaoke, .cpkaraoke_end-channels_process_sound_chan_karaoke 310 .size channels_process_sound_chan_karaoke, .cpkaraoke_end-channels_process_sound_chan_karaoke
326
327/**************************************************************************** 311/****************************************************************************
328 * void sample_output_stereo(int count, struct dsp_data *data, 312 * void sample_output_stereo(int count, struct dsp_data *data,
329 * int32_t *src[], int16_t *dst) 313 * int32_t *src[], int16_t *dst)
@@ -382,34 +366,33 @@ sample_output_stereo:
382.sos_lineloop_start: 366.sos_lineloop_start:
383 lea.l -12(%a0), %a5 | %a5 = at or just before last line bound 367 lea.l -12(%a0), %a5 | %a5 = at or just before last line bound
384.sos_lineloop: 368.sos_lineloop:
385 move.l (%a2)+, %d0 | get next 4 L samples and scale
386 mac.l %d0, %a1, (%a2)+, %d1, %acc0 | with saturation
387 mac.l %d1, %a1, (%a2)+, %d2, %acc1 |
388 mac.l %d2, %a1, (%a2)+, %d3, %acc2 |
389 mac.l %d3, %a1, %acc3 |
390 movclr.l %acc0, %d0 | obtain results
391 movclr.l %acc1, %d1 |
392 movclr.l %acc2, %d2 |
393 movclr.l %acc3, %d3 |
394 move.l (%a3)+, %d4 | get next 4 R samples and scale 369 move.l (%a3)+, %d4 | get next 4 R samples and scale
395 mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation 370 mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation
396 mac.l %d5, %a1, (%a3)+, %d6, %acc1 | 371 mac.l %d5, %a1, (%a3)+, %d6, %acc1 |
397 mac.l %d6, %a1, (%a3)+, %d7, %acc2 | 372 mac.l %d6, %a1, (%a3)+, %d7, %acc2 |
398 mac.l %d7, %a1, %acc3 | 373 mac.l %d7, %a1, (%a2)+, %d0, %acc3 |
399 movclr.l %acc0, %d4 | obtain results 374 lea.l 16(%a4), %a4 | increment dest here, mitigate stalls
375 movclr.l %acc0, %d4 | obtain R results
400 movclr.l %acc1, %d5 | 376 movclr.l %acc1, %d5 |
401 movclr.l %acc2, %d6 | 377 movclr.l %acc2, %d6 |
402 movclr.l %acc3, %d7 | 378 movclr.l %acc3, %d7 |
403 swap %d4 | interleave most significant 379 mac.l %d0, %a1, (%a2)+, %d1, %acc0 | get next 4 L samples and scale
404 move.w %d4, %d0 | 16 bits of L and R 380 mac.l %d1, %a1, (%a2)+, %d2, %acc1 | with saturation
381 mac.l %d2, %a1, (%a2)+, %d3, %acc2 |
382 mac.l %d3, %a1 , %acc3 |
383 swap %d4 | a) interleave most significant...
405 swap %d5 | 384 swap %d5 |
406 move.w %d5, %d1 |
407 swap %d6 | 385 swap %d6 |
408 move.w %d6, %d2 |
409 swap %d7 | 386 swap %d7 |
387 movclr.l %acc0, %d0 | obtain L results
388 movclr.l %acc1, %d1 |
389 movclr.l %acc2, %d2 |
390 movclr.l %acc3, %d3 |
391 move.w %d4, %d0 | a) ... 16 bits of L and R
392 move.w %d5, %d1 |
393 move.w %d6, %d2 |
410 move.w %d7, %d3 | 394 move.w %d7, %d3 |
411 movem.l %d0-%d3, (%a4) | write four stereo samples 395 movem.l %d0-%d3, -16(%a4) | write four stereo samples
412 lea.l 16(%a4), %a4 |
413 cmp.l %a4, %a5 | 396 cmp.l %a4, %a5 |
414 bhi.b .sos_lineloop | 397 bhi.b .sos_lineloop |
415.sos_longloop_1_start: 398.sos_longloop_1_start:
@@ -480,7 +463,8 @@ sample_output_mono:
480 mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation 463 mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation
481 mac.l %d1, %d5, (%a2)+, %d2, %acc1 | 464 mac.l %d1, %d5, (%a2)+, %d2, %acc1 |
482 mac.l %d2, %d5, (%a2)+, %d3, %acc2 | 465 mac.l %d2, %d5, (%a2)+, %d3, %acc2 |
483 mac.l %d3, %d5, %acc3 | 466 mac.l %d3, %d5 , %acc3 |
467 lea.l 16(%a3), %a3 | increment dest here, mitigate stalls
484 movclr.l %acc0, %d0 | obtain results 468 movclr.l %acc0, %d0 | obtain results
485 movclr.l %acc1, %d1 | 469 movclr.l %acc1, %d1 |
486 movclr.l %acc2, %d2 | 470 movclr.l %acc2, %d2 |
@@ -497,8 +481,7 @@ sample_output_mono:
497 move.l %d3, %d4 | 481 move.l %d3, %d4 |
498 swap %d4 | 482 swap %d4 |
499 move.w %d4, %d3 | 483 move.w %d4, %d3 |
500 movem.l %d0-%d3, (%a3) | write four stereo samples 484 movem.l %d0-%d3, -16(%a3) | write four stereo samples
501 lea.l 16(%a3), %a3 |
502 cmp.l %a3, %a1 | 485 cmp.l %a3, %a1 |
503 bhi.b .som_lineloop | 486 bhi.b .som_lineloop |
504.som_longloop_1_start: 487.som_longloop_1_start: