summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Sevakis <jethead71@rockbox.org>2010-04-25 20:04:47 +0000
committerMichael Sevakis <jethead71@rockbox.org>2010-04-25 20:04:47 +0000
commit91bdc3ea90035b3dae19e2f6484ea886eef94433 (patch)
treee6113841a0e3bbd00e1ad46068f5e1237d4181e7
parentb9fa116703227ccbaca02d65d8f726f6e31ceebe (diff)
downloadrockbox-91bdc3ea90035b3dae19e2f6484ea886eef94433.tar.gz
rockbox-91bdc3ea90035b3dae19e2f6484ea886eef94433.zip
Optimized DSP sample out functions for armv6. (For stereo output) ~9% faster than SVN asm and about 4% faster than SVN asm rearranged to observe pipeline hazards.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25717 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/SOURCES3
-rw-r--r--apps/dsp_arm.S5
-rw-r--r--apps/dsp_arm_v6.S127
3 files changed, 134 insertions, 1 deletions
diff --git a/apps/SOURCES b/apps/SOURCES
index 7bc263a153..66f2a7da51 100644
--- a/apps/SOURCES
+++ b/apps/SOURCES
@@ -158,6 +158,9 @@ dsp_cf.S
158eq_cf.S 158eq_cf.S
159#elif defined(CPU_ARM) 159#elif defined(CPU_ARM)
160dsp_arm.S 160dsp_arm.S
161#if ARM_ARCH >= 6
162dsp_arm_v6.S
163#endif
161eq_arm.S 164eq_arm.S
162#endif 165#endif
163#endif 166#endif
diff --git a/apps/dsp_arm.S b/apps/dsp_arm.S
index b90e632782..f924569bc5 100644
--- a/apps/dsp_arm.S
+++ b/apps/dsp_arm.S
@@ -18,6 +18,7 @@
18 * KIND, either express or implied. 18 * KIND, either express or implied.
19 * 19 *
20 ****************************************************************************/ 20 ****************************************************************************/
21 #include "config.h"
21 22
22/**************************************************************************** 23/****************************************************************************
23 * void channels_process_sound_chan_mono(int count, int32_t *buf[]) 24 * void channels_process_sound_chan_mono(int count, int32_t *buf[])
@@ -83,7 +84,8 @@ channels_process_sound_chan_karaoke:
83 ldmfd sp!, {r4-r5, pc} 84 ldmfd sp!, {r4-r5, pc}
84.karaokeend: 85.karaokeend:
85 .size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke 86 .size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke
86 87
88#if ARM_ARCH < 6
87/**************************************************************************** 89/****************************************************************************
88 * void sample_output_mono(int count, struct dsp_data *data, 90 * void sample_output_mono(int count, struct dsp_data *data,
89 * const int32_t *src[], int16_t *dst) 91 * const int32_t *src[], int16_t *dst)
@@ -195,6 +197,7 @@ sample_output_stereo:
195 ldmfd sp!, {r4-r10, pc} 197 ldmfd sp!, {r4-r10, pc}
196.sosend: 198.sosend:
197 .size sample_output_stereo,.sosend-sample_output_stereo 199 .size sample_output_stereo,.sosend-sample_output_stereo
200#endif /* ARM_ARCH < 6 */
198 201
199/**************************************************************************** 202/****************************************************************************
200 * void apply_crossfeed(int count, int32_t* src[]) 203 * void apply_crossfeed(int count, int32_t* src[])
diff --git a/apps/dsp_arm_v6.S b/apps/dsp_arm_v6.S
new file mode 100644
index 0000000000..39949498ea
--- /dev/null
+++ b/apps/dsp_arm_v6.S
@@ -0,0 +1,127 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2010 Michael Sevakis
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21
22/****************************************************************************
23 * void sample_output_mono(int count, struct dsp_data *data,
24 * const int32_t *src[], int16_t *dst)
25 */
26 .section .text, "ax", %progbits
27 .align 2
28 .global sample_output_mono
29 .type sample_output_mono, %function
30sample_output_mono:
31 @ input: r0 = count, r1 = data, r2 = src, r3 = dst
32 stmfd sp!, { r4, lr } @
33 @
34 ldr r1, [r1] @ r1 = data->output_scale
35 ldr r2, [r2] @ r2 = src[0]
36 @
37 mov r4, #1 @ r4 = 1 << (scale - 1)
38 mov r4, r4, lsl r1 @
39 subs r0, r0, #1 @ odd: end at 0; even: end at -1
40 mov r4, r4, lsr #1 @
41 beq 2f @ Zero? Only one sample!
42 @
431: @
44 ldmia r2!, { r12, r14 } @ load Mi0, Mi1
45 qadd r12, r12, r4 @ round, scale, saturate and
46 qadd r14, r14, r4 @ pack Mi0 to So0, Mi1 to So1
47 mov r12, r12, asr r1 @
48 mov r14, r14, asr r1 @
49 ssat r12, #16, r12 @
50 ssat r14, #16, r14 @
51 pkhbt r12, r12, r12, asl #16 @
52 pkhbt r14, r14, r14, asl #16 @
53 subs r0, r0, #2 @
54 stmia r3!, { r12, r14 } @ store So0, So1
55 bgt 1b @
56 @
57 ldmltfd sp!, { r4, pc } @ if count was even, we're done
58 @
592: @
60 ldr r12, [r2] @ round, scale, saturate
61 qadd r12, r12, r4 @ and pack Mi to So
62 mov r12, r12, asr r1 @
63 ssat r12, #16, r12 @
64 pkhbt r12, r12, r12, asl #16 @
65 str r12, [r3] @ store So
66 @
67 ldmfd sp!, { r4, pc } @
68 .size sample_output_mono, .-sample_output_mono
69
70/****************************************************************************
71 * void sample_output_stereo(int count, struct dsp_data *data,
72 * const int32_t *src[], int16_t *dst)
73 */
74 .section .text, "ax", %progbits
75 .align 2
76 .global sample_output_stereo
77 .type sample_output_stereo, %function
78sample_output_stereo:
79 @ input: r0 = count, r1 = data, r2 = src, r3 = dst
80 stmfd sp!, { r4-r7, lr } @
81 @
82 ldr r1, [r1] @ r1 = data->output_scale
83 ldmia r2, { r2, r4 } @ r2 = src[0], r4 = src[1]
84 @
85 mov r5, #1 @ r5 = 1 << (scale - 1)
86 mov r5, r5, lsl r1 @
87 subs r0, r0, #1 @ odd: end at 0; even: end at -1
88 mov r5, r5, lsr #1 @
89 beq 2f @ Zero? Only one sample!
90 @
911: @
92 ldmia r2!, { r6, r7 } @ r6, r7 = Li0, Li1
93 ldmia r4!, { r12, r14 } @ r12, r14 = Ri0, Ri1
94 qadd r6, r6, r5 @ round, scale, saturate and pack
95 qadd r7, r7, r5 @ Li0+Ri0 to So0, Li1+Ri1 to So1
96 qadd r12, r12, r5 @
97 qadd r14, r14, r5 @
98 mov r6, r6, asr r1 @
99 mov r7, r7, asr r1 @
100 mov r12, r12, asr r1 @
101 mov r14, r14, asr r1 @
102 ssat r6, #16, r6 @
103 ssat r12, #16, r12 @
104 ssat r7, #16, r7 @
105 ssat r14, #16, r14 @
106 pkhbt r6, r6, r12, asl #16 @
107 pkhbt r7, r7, r14, asl #16 @
108 subs r0, r0, #2 @
109 stmia r3!, { r6, r7 } @ store So0, So1
110 bgt 1b @
111 @
112 ldmltfd sp!, { r4-r7, pc } @ if count was even, we're done
113 @
1142: @
115 ldr r6, [r2] @ r6 = Li
116 ldr r12, [r4] @ r12 = Ri
117 qadd r6, r6, r5 @ round, scale, saturate
118 qadd r12, r12, r5 @ and pack Li+Ri to So
119 mov r6, r6, asr r1 @
120 mov r12, r12, asr r1 @
121 ssat r6, #16, r6 @
122 ssat r12, #16, r12 @
123 pkhbt r6, r6, r12, asl #16 @
124 str r6, [r3] @ store So
125 @
126 ldmfd sp!, { r4-r7, pc } @
127 .size sample_output_stereo, .-sample_output_stereo