summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2007-10-22 23:44:19 +0000
committerThom Johansen <thomj@rockbox.org>2007-10-22 23:44:19 +0000
commit9d9225ed1ddefab985ab3ffd7e77bccf979f1c5b (patch)
tree19e01cb71c3da504525eea6fd0d8836493b2ae12
parent4b259e9553c644e6af393042a728dd4b74da20a9 (diff)
downloadrockbox-9d9225ed1ddefab985ab3ffd7e77bccf979f1c5b.tar.gz
rockbox-9d9225ed1ddefab985ab3ffd7e77bccf979f1c5b.zip
iir_mem16() in assembler for Coldfire for a decent performance boost. Add EMAC init in nb_celp.c, since all modes need this as a base.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15274 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libspeex/SOURCES3
-rw-r--r--apps/codecs/libspeex/config-speex.h5
-rw-r--r--apps/codecs/libspeex/filters.c2
-rw-r--r--apps/codecs/libspeex/filters_cf.S165
-rw-r--r--apps/codecs/libspeex/nb_celp.c3
5 files changed, 178 insertions, 0 deletions
diff --git a/apps/codecs/libspeex/SOURCES b/apps/codecs/libspeex/SOURCES
index df73565734..a5d4318282 100644
--- a/apps/codecs/libspeex/SOURCES
+++ b/apps/codecs/libspeex/SOURCES
@@ -42,3 +42,6 @@ vbr.c
42vorbis_psy.c 42vorbis_psy.c
43vq.c 43vq.c
44window.c 44window.c
45#ifdef CPU_COLDFIRE
46filters_cf.S
47#endif
diff --git a/apps/codecs/libspeex/config-speex.h b/apps/codecs/libspeex/config-speex.h
index b7385eee02..31ebe80741 100644
--- a/apps/codecs/libspeex/config-speex.h
+++ b/apps/codecs/libspeex/config-speex.h
@@ -11,6 +11,11 @@
11#define ARM4_ASM 11#define ARM4_ASM
12#endif 12#endif
13 13
14/* Make use of Coldfire assembly optimizations */
15#if defined(CPU_COLDFIRE)
16#define COLDFIRE_ASM
17#endif
18
14/* Make use of Blackfin assembly optimizations */ 19/* Make use of Blackfin assembly optimizations */
15/* #undef BFIN_ASM */ 20/* #undef BFIN_ASM */
16 21
diff --git a/apps/codecs/libspeex/filters.c b/apps/codecs/libspeex/filters.c
index a6a5f62d26..02f93a27b1 100644
--- a/apps/codecs/libspeex/filters.c
+++ b/apps/codecs/libspeex/filters.c
@@ -45,6 +45,8 @@
45#include "filters_sse.h" 45#include "filters_sse.h"
46#elif defined (ARM4_ASM) || defined(ARM5E_ASM) 46#elif defined (ARM4_ASM) || defined(ARM5E_ASM)
47#include "filters_arm4.h" 47#include "filters_arm4.h"
48#elif defined (COLDFIRE_ASM)
49#define OVERRIDE_IIR_MEM16
48#elif defined (BFIN_ASM) 50#elif defined (BFIN_ASM)
49#include "filters_bfin.h" 51#include "filters_bfin.h"
50#endif 52#endif
diff --git a/apps/codecs/libspeex/filters_cf.S b/apps/codecs/libspeex/filters_cf.S
new file mode 100644
index 0000000000..579af11581
--- /dev/null
+++ b/apps/codecs/libspeex/filters_cf.S
@@ -0,0 +1,165 @@
1/* Copyright (C) 2007 Thom Johansen */
2/**
3 @file filters_cf.S
4 @brief Various analysis/synthesis filters (Coldfire version)
5*/
6/*
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 - Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 - Neither the name of the Xiph.org Foundation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
26 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33*/
34
35 .text
36/* void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) */
37 .global iir_mem16
38iir_mem16:
39 lea.l (-44, %sp), %sp
40 movem.l %d2-%d7/%a2-%a6, (%sp)
41 movem.l (44+4, %sp), %a3-%a5 | a3 = x, a4 = den, a5 = y
42 movem.l (44+20, %sp), %d0/%a6 | d0 = ord, a6 = mem
43 moveq.l #8, %d1 | Jump to correct routine based on 'ord'
44 cmp.l %d1, %d0
45 jeq .order_8
46 moveq.l #10, %d1
47 cmp.l %d1, %d0
48 jeq .order_10
49 jra .exit
50
51 | d0 = y[i], d1-d7, a0 = mem[0] .. mem[7]
52 | a3 = x, a4 = den, a5 = y, a6 = temp
53.order_8:
54 movem.l (%a6), %d1-%d7/%a0 | Fetch mem[] array
550:
56 moveq.l #13, %d0
57 add.l #4096, %d1
58 asr.l %d0, %d1 | mem[0] >> 13 with rounding
59 move.w (%a3)+, %d0
60 ext.l %d0
61 add.l %d1, %d0 | Add with x[i]
62 move.l #32768, %d1
63 add.l %d1, %d0 | Bias result to [0..65535]
64 cmp.l #65535, %d0 | Clip to [0..65535] range
65 jle 1f
66 spl.b %d0
67 ext.w %d0
681:
69 sub.l %d1, %d0 | Bias clipped result back to [-32768..32767]
70 neg.l %d0 | msac.w is bugged in gas, do this for now
71 move.w %d0, (%a5)+ | Write result to y[i]
72 move.l (%a4)+, %a6 | Fetch den[0] and den[1]
73 mac.w %a6u, %d0l, %acc0
74 mac.w %a6l, %d0l, (%a4)+, %a6, %acc1
75 mac.w %a6u, %d0l, %acc2
76 mac.w %a6l, %d0l, (%a4)+, %a6, %acc3
77 movclr.l %acc0, %d1
78 add.l %d2, %d1 | mem[0] = mem[1] - den[0]*y[i]
79 movclr.l %acc1, %d2
80 add.l %d3, %d2 | mem[1] = mem[2] - den[1]*y[i]
81 movclr.l %acc2, %d3
82 add.l %d4, %d3 | mem[2] = mem[3] - den[2]*y[i]
83 movclr.l %acc3, %d4
84 add.l %d5, %d4 | mem[3] = mem[4] - den[3]*y[i]
85 mac.w %a6u, %d0l, %acc0
86 mac.w %a6l, %d0l, (%a4)+, %a6, %acc1
87 mac.w %a6u, %d0l, %acc2
88 mac.w %a6l, %d0l, %acc3
89 lea.l (-16, %a4), %a4 | wrap den pointer back to den[0]
90 movclr.l %acc0, %d5
91 add.l %d6, %d5 | mem[4] = mem[5] - den[4]*y[i]
92 movclr.l %acc1, %d6
93 add.l %d7, %d6 | mem[5] = mem[6] - den[5]*y[i]
94 movclr.l %acc2, %d7
95 add.l %a0, %d7 | mem[6] = mem[7] - den[6]*y[i]
96 movclr.l %acc3, %a0 | mem[7] = -den[7]*y[i]
97 subq.l #1, (44+16, %sp) | Have we done all samples?
98 jne 0b
99 move.l (44+24, %sp), %a6 | Fetch mem pointer
100 movem.l %d1-%d7/%a0, (%a6) | Save back mem[]
101 jra .exit
102
103 | d0 = y[i], d1-d7, a0-a2 = mem[0] .. mem[9]
104 | a3 = x, a4 = den, a5 = y, a6 = temp
105.order_10:
106 movem.l (%a6), %d1-%d7/%a0-%a2 | Fetch mem[] array
1070:
108 moveq.l #13, %d0
109 add.l #4096, %d1
110 asr.l %d0, %d1 | mem[0] >> 13 with rounding
111 move.w (%a3)+, %d0
112 ext.l %d0
113 add.l %d1, %d0 | Add with x[i]
114 move.l #32768, %d1
115 add.l %d1, %d0 | Bias result to [0..65535]
116 cmp.l #65535, %d0 | Clip to [0..65535] range
117 jle 1f
118 spl.b %d0
119 ext.w %d0
1201:
121 sub.l %d1, %d0 | Bias clipped result back to [-32768..32767]
122 neg.l %d0 | msac.w is bugged in gas, do this for now
123 move.w %d0, (%a5)+ | Write result to y[i]
124 move.l (%a4)+, %a6 | Fetch den[0] and den[1]
125 mac.w %a6u, %d0l, %acc0
126 mac.w %a6l, %d0l, (%a4)+, %a6, %acc1
127 mac.w %a6u, %d0l, %acc2
128 mac.w %a6l, %d0l, (%a4)+, %a6, %acc3
129 movclr.l %acc0, %d1
130 add.l %d2, %d1 | mem[0] = mem[1] - den[0]*y[i]
131 movclr.l %acc1, %d2
132 add.l %d3, %d2 | mem[1] = mem[2] - den[1]*y[i]
133 movclr.l %acc2, %d3
134 add.l %d4, %d3 | mem[2] = mem[3] - den[2]*y[i]
135 movclr.l %acc3, %d4
136 add.l %d5, %d4 | mem[3] = mem[4] - den[3]*y[i]
137 mac.w %a6u, %d0l, %acc0
138 mac.w %a6l, %d0l, (%a4)+, %a6, %acc1
139 mac.w %a6u, %d0l, %acc2
140 mac.w %a6l, %d0l, (%a4)+, %a6, %acc3
141 lea.l (-20, %a4), %a4 | wrap den pointer back to den[0]
142 movclr.l %acc0, %d5
143 add.l %d6, %d5 | mem[4] = mem[5] - den[4]*y[i]
144 movclr.l %acc1, %d6
145 add.l %d7, %d6 | mem[5] = mem[6] - den[5]*y[i]
146 movclr.l %acc2, %d7
147 add.l %a0, %d7 | mem[6] = mem[7] - den[6]*y[i]
148 movclr.l %acc3, %a0
149 add.l %a1, %a0 | mem[7] = mem[8] - den[7]*y[i]
150 mac.w %a6u, %d0l, %acc0
151 mac.w %a6l, %d0l, %acc1
152 movclr.l %acc0, %a1
153 add.l %a2, %a1 | mem[8] = mem[9] - den[8]*y[i]
154 movclr.l %acc1, %a2 | mem[9] = -den[9]*y[i]
155
156 subq.l #1, (44+16, %sp) | Have we done all samples?
157 jne 0b
158 move.l (44+24, %sp), %a6 | Fetch mem pointer
159 movem.l %d1-%d7/%a0-%a2, (%a6) | Save back mem[]
160
161.exit:
162 movem.l (%sp), %d2-%d7/%a2-%a6
163 lea.l (44, %sp), %sp
164 rts
165
diff --git a/apps/codecs/libspeex/nb_celp.c b/apps/codecs/libspeex/nb_celp.c
index 0798ffbcf4..5d167a2aa3 100644
--- a/apps/codecs/libspeex/nb_celp.c
+++ b/apps/codecs/libspeex/nb_celp.c
@@ -1108,6 +1108,9 @@ void *nb_decoder_init(const SpeexMode *m)
1108 st->isWideband = 0; 1108 st->isWideband = 0;
1109 st->highpass_enabled = 1; 1109 st->highpass_enabled = 1;
1110 1110
1111#ifdef CPU_COLDFIRE
1112 coldfire_set_macsr(0); // Integer mode
1113#endif
1111#ifdef ENABLE_VALGRIND 1114#ifdef ENABLE_VALGRIND
1112 VALGRIND_MAKE_READABLE(st, NB_DEC_STACK); 1115 VALGRIND_MAKE_READABLE(st, NB_DEC_STACK);
1113#endif 1116#endif