summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2010-06-17 16:49:39 +0000
committerNils Wallménius <nils@rockbox.org>2010-06-17 16:49:39 +0000
commit3d2b1cfa6e3307d3a97a055776ea342cd62f683e (patch)
tree2c499bb5441c7e825ccae84fd67de49df57983ff
parentb6c17c2c82602723a033298fafbc37b9462a9f34 (diff)
downloadrockbox-3d2b1cfa6e3307d3a97a055776ea342cd62f683e.tar.gz
rockbox-3d2b1cfa6e3307d3a97a055776ea342cd62f683e.zip
ARMv6 vector mutiplication asm, speeds up vorbis decoding about 0.1MHz on gigabeat S.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@26892 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libtremor/asm_arm.h50
1 files changed, 50 insertions, 0 deletions
diff --git a/apps/codecs/libtremor/asm_arm.h b/apps/codecs/libtremor/asm_arm.h
index ada0604a3a..683de7bf64 100644
--- a/apps/codecs/libtremor/asm_arm.h
+++ b/apps/codecs/libtremor/asm_arm.h
@@ -112,6 +112,32 @@ void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
112 } while (n); 112 } while (n);
113} 113}
114 114
115#if ARM_ARCH >= 6
116static inline
117void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
118{
119 /* Note, mult_fw uses MULT31 */
120 do{
121 asm volatile (
122 "ldmia %[d], {r0, r1, r2, r3};"
123 "ldmia %[w]!, {r4, r5, r6, r7};"
124 "smmul r0, r4, r0;"
125 "smmul r1, r5, r1;"
126 "smmul r2, r6, r2;"
127 "smmul r3, r7, r3;"
128 "mov r0, r0, lsl #1;"
129 "mov r1, r1, lsl #1;"
130 "mov r2, r2, lsl #1;"
131 "mov r3, r3, lsl #1;"
132 "stmia %[d]!, {r0, r1, r2, r3};"
133 : [d] "+r" (data), [w] "+r" (window)
134 : : "r0", "r1", "r2", "r3",
135 "r4", "r5", "r6", "r7",
136 "memory" );
137 n -= 4;
138 } while (n);
139}
140#else
115static inline 141static inline
116void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) 142void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
117{ 143{
@@ -136,7 +162,30 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
136 n -= 4; 162 n -= 4;
137 } while (n); 163 } while (n);
138} 164}
165#endif
139 166
167#if ARM_ARCH >= 6
168static inline
169void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
170{
171 /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
172 /* On ARM, we can do the shift at the same time as the overlap-add */
173 do{
174 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
175 "ldmda %[w]!, {r4, r5, r6, r7};"
176 "smmul r0, r7, r0;"
177 "smmul r1, r6, r1;"
178 "smmul r2, r5, r2;"
179 "smmul r3, r4, r3;"
180 "stmia %[d]!, {r0, r1, r2, r3};"
181 : [d] "+r" (data), [w] "+r" (window)
182 : : "r0", "r1", "r2", "r3",
183 "r4", "r5", "r6", "r7",
184 "memory" );
185 n -= 4;
186 } while (n);
187}
188#else
140static inline 189static inline
141void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) 190void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
142{ 191{
@@ -157,6 +206,7 @@ void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
157 n -= 4; 206 n -= 4;
158 } while (n); 207 } while (n);
159} 208}
209#endif
160 210
161static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n) 211static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
162{ 212{