diff options
author | Michael Sevakis <jethead71@rockbox.org> | 2010-04-25 20:04:47 +0000 |
---|---|---|
committer | Michael Sevakis <jethead71@rockbox.org> | 2010-04-25 20:04:47 +0000 |
commit | 91bdc3ea90035b3dae19e2f6484ea886eef94433 (patch) | |
tree | e6113841a0e3bbd00e1ad46068f5e1237d4181e7 /apps | |
parent | b9fa116703227ccbaca02d65d8f726f6e31ceebe (diff) | |
download | rockbox-91bdc3ea90035b3dae19e2f6484ea886eef94433.tar.gz rockbox-91bdc3ea90035b3dae19e2f6484ea886eef94433.zip |
Optimized DSP sample out functions for armv6. (For stereo output) ~9% faster than SVN asm and about 4% faster than SVN asm rearranged to observe pipeline hazards.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25717 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/SOURCES | 3 | ||||
-rw-r--r-- | apps/dsp_arm.S | 5 | ||||
-rw-r--r-- | apps/dsp_arm_v6.S | 127 |
3 files changed, 134 insertions, 1 deletions
diff --git a/apps/SOURCES b/apps/SOURCES index 7bc263a153..66f2a7da51 100644 --- a/apps/SOURCES +++ b/apps/SOURCES | |||
@@ -158,6 +158,9 @@ dsp_cf.S | |||
158 | eq_cf.S | 158 | eq_cf.S |
159 | #elif defined(CPU_ARM) | 159 | #elif defined(CPU_ARM) |
160 | dsp_arm.S | 160 | dsp_arm.S |
161 | #if ARM_ARCH >= 6 | ||
162 | dsp_arm_v6.S | ||
163 | #endif | ||
161 | eq_arm.S | 164 | eq_arm.S |
162 | #endif | 165 | #endif |
163 | #endif | 166 | #endif |
diff --git a/apps/dsp_arm.S b/apps/dsp_arm.S index b90e632782..f924569bc5 100644 --- a/apps/dsp_arm.S +++ b/apps/dsp_arm.S | |||
@@ -18,6 +18,7 @@ | |||
18 | * KIND, either express or implied. | 18 | * KIND, either express or implied. |
19 | * | 19 | * |
20 | ****************************************************************************/ | 20 | ****************************************************************************/ |
21 | #include "config.h" | ||
21 | 22 | ||
22 | /**************************************************************************** | 23 | /**************************************************************************** |
23 | * void channels_process_sound_chan_mono(int count, int32_t *buf[]) | 24 | * void channels_process_sound_chan_mono(int count, int32_t *buf[]) |
@@ -83,7 +84,8 @@ channels_process_sound_chan_karaoke: | |||
83 | ldmfd sp!, {r4-r5, pc} | 84 | ldmfd sp!, {r4-r5, pc} |
84 | .karaokeend: | 85 | .karaokeend: |
85 | .size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke | 86 | .size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke |
86 | 87 | ||
88 | #if ARM_ARCH < 6 | ||
87 | /**************************************************************************** | 89 | /**************************************************************************** |
88 | * void sample_output_mono(int count, struct dsp_data *data, | 90 | * void sample_output_mono(int count, struct dsp_data *data, |
89 | * const int32_t *src[], int16_t *dst) | 91 | * const int32_t *src[], int16_t *dst) |
@@ -195,6 +197,7 @@ sample_output_stereo: | |||
195 | ldmfd sp!, {r4-r10, pc} | 197 | ldmfd sp!, {r4-r10, pc} |
196 | .sosend: | 198 | .sosend: |
197 | .size sample_output_stereo,.sosend-sample_output_stereo | 199 | .size sample_output_stereo,.sosend-sample_output_stereo |
200 | #endif /* ARM_ARCH < 6 */ | ||
198 | 201 | ||
199 | /**************************************************************************** | 202 | /**************************************************************************** |
200 | * void apply_crossfeed(int count, int32_t* src[]) | 203 | * void apply_crossfeed(int count, int32_t* src[]) |
diff --git a/apps/dsp_arm_v6.S b/apps/dsp_arm_v6.S new file mode 100644 index 0000000000..39949498ea --- /dev/null +++ b/apps/dsp_arm_v6.S | |||
@@ -0,0 +1,127 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2010 Michael Sevakis | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version 2 | ||
15 | * of the License, or (at your option) any later version. | ||
16 | * | ||
17 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
18 | * KIND, either express or implied. | ||
19 | * | ||
20 | ****************************************************************************/ | ||
21 | |||
22 | /**************************************************************************** | ||
23 | * void sample_output_mono(int count, struct dsp_data *data, | ||
24 | * const int32_t *src[], int16_t *dst) | ||
25 | */ | ||
26 | .section .text, "ax", %progbits | ||
27 | .align 2 | ||
28 | .global sample_output_mono | ||
29 | .type sample_output_mono, %function | ||
30 | sample_output_mono: | ||
31 | @ input: r0 = count, r1 = data, r2 = src, r3 = dst | ||
32 | stmfd sp!, { r4, lr } @ | ||
33 | @ | ||
34 | ldr r1, [r1] @ r1 = data->output_scale | ||
35 | ldr r2, [r2] @ r2 = src[0] | ||
36 | @ | ||
37 | mov r4, #1 @ r4 = 1 << (scale - 1) | ||
38 | mov r4, r4, lsl r1 @ | ||
39 | subs r0, r0, #1 @ odd: end at 0; even: end at -1 | ||
40 | mov r4, r4, lsr #1 @ | ||
41 | beq 2f @ Zero? Only one sample! | ||
42 | @ | ||
43 | 1: @ | ||
44 | ldmia r2!, { r12, r14 } @ load Mi0, Mi1 | ||
45 | qadd r12, r12, r4 @ round, scale, saturate and | ||
46 | qadd r14, r14, r4 @ pack Mi0 to So0, Mi1 to So1 | ||
47 | mov r12, r12, asr r1 @ | ||
48 | mov r14, r14, asr r1 @ | ||
49 | ssat r12, #16, r12 @ | ||
50 | ssat r14, #16, r14 @ | ||
51 | pkhbt r12, r12, r12, asl #16 @ | ||
52 | pkhbt r14, r14, r14, asl #16 @ | ||
53 | subs r0, r0, #2 @ | ||
54 | stmia r3!, { r12, r14 } @ store So0, So1 | ||
55 | bgt 1b @ | ||
56 | @ | ||
57 | ldmltfd sp!, { r4, pc } @ if count was even, we're done | ||
58 | @ | ||
59 | 2: @ | ||
60 | ldr r12, [r2] @ round, scale, saturate | ||
61 | qadd r12, r12, r4 @ and pack Mi to So | ||
62 | mov r12, r12, asr r1 @ | ||
63 | ssat r12, #16, r12 @ | ||
64 | pkhbt r12, r12, r12, asl #16 @ | ||
65 | str r12, [r3] @ store So | ||
66 | @ | ||
67 | ldmfd sp!, { r4, pc } @ | ||
68 | .size sample_output_mono, .-sample_output_mono | ||
69 | |||
70 | /**************************************************************************** | ||
71 | * void sample_output_stereo(int count, struct dsp_data *data, | ||
72 | * const int32_t *src[], int16_t *dst) | ||
73 | */ | ||
74 | .section .text, "ax", %progbits | ||
75 | .align 2 | ||
76 | .global sample_output_stereo | ||
77 | .type sample_output_stereo, %function | ||
78 | sample_output_stereo: | ||
79 | @ input: r0 = count, r1 = data, r2 = src, r3 = dst | ||
80 | stmfd sp!, { r4-r7, lr } @ | ||
81 | @ | ||
82 | ldr r1, [r1] @ r1 = data->output_scale | ||
83 | ldmia r2, { r2, r4 } @ r2 = src[0], r4 = src[1] | ||
84 | @ | ||
85 | mov r5, #1 @ r5 = 1 << (scale - 1) | ||
86 | mov r5, r5, lsl r1 @ | ||
87 | subs r0, r0, #1 @ odd: end at 0; even: end at -1 | ||
88 | mov r5, r5, lsr #1 @ | ||
89 | beq 2f @ Zero? Only one sample! | ||
90 | @ | ||
91 | 1: @ | ||
92 | ldmia r2!, { r6, r7 } @ r6, r7 = Li0, Li1 | ||
93 | ldmia r4!, { r12, r14 } @ r12, r14 = Ri0, Ri1 | ||
94 | qadd r6, r6, r5 @ round, scale, saturate and pack | ||
95 | qadd r7, r7, r5 @ Li0+Ri0 to So0, Li1+Ri1 to So1 | ||
96 | qadd r12, r12, r5 @ | ||
97 | qadd r14, r14, r5 @ | ||
98 | mov r6, r6, asr r1 @ | ||
99 | mov r7, r7, asr r1 @ | ||
100 | mov r12, r12, asr r1 @ | ||
101 | mov r14, r14, asr r1 @ | ||
102 | ssat r6, #16, r6 @ | ||
103 | ssat r12, #16, r12 @ | ||
104 | ssat r7, #16, r7 @ | ||
105 | ssat r14, #16, r14 @ | ||
106 | pkhbt r6, r6, r12, asl #16 @ | ||
107 | pkhbt r7, r7, r14, asl #16 @ | ||
108 | subs r0, r0, #2 @ | ||
109 | stmia r3!, { r6, r7 } @ store So0, So1 | ||
110 | bgt 1b @ | ||
111 | @ | ||
112 | ldmltfd sp!, { r4-r7, pc } @ if count was even, we're done | ||
113 | @ | ||
114 | 2: @ | ||
115 | ldr r6, [r2] @ r6 = Li | ||
116 | ldr r12, [r4] @ r12 = Ri | ||
117 | qadd r6, r6, r5 @ round, scale, saturate | ||
118 | qadd r12, r12, r5 @ and pack Li+Ri to So | ||
119 | mov r6, r6, asr r1 @ | ||
120 | mov r12, r12, asr r1 @ | ||
121 | ssat r6, #16, r6 @ | ||
122 | ssat r12, #16, r12 @ | ||
123 | pkhbt r6, r6, r12, asl #16 @ | ||
124 | str r6, [r3] @ store So | ||
125 | @ | ||
126 | ldmfd sp!, { r4-r7, pc } @ | ||
127 | .size sample_output_stereo, .-sample_output_stereo | ||