summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/codecs/demac/libdemac/decoder.c13
-rw-r--r--apps/codecs/demac/libdemac/demac_config.h27
-rw-r--r--apps/codecs/demac/libdemac/filter.c41
-rw-r--r--apps/codecs/demac/libdemac/filter.h12
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_arm7.h293
-rw-r--r--apps/codecs/demac/libdemac/vector_math32_armv4.h210
-rw-r--r--apps/codecs/demac/libdemac/vector_math_generic.h (renamed from apps/codecs/demac/libdemac/vector_math16.h)10
7 files changed, 280 insertions, 326 deletions
diff --git a/apps/codecs/demac/libdemac/decoder.c b/apps/codecs/demac/libdemac/decoder.c
index 540db47636..31bcb28b72 100644
--- a/apps/codecs/demac/libdemac/decoder.c
+++ b/apps/codecs/demac/libdemac/decoder.c
@@ -33,15 +33,16 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
33 33
34/* Statically allocate the filter buffers */ 34/* Statically allocate the filter buffers */
35 35
36static int16_t filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2] /* 2432 bytes */ 36static filter_int filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2]
37 IBSS_ATTR __attribute__((aligned(16))); 37 IBSS_ATTR __attribute__((aligned(16))); /* 2432/4864 bytes */
38static int16_t filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2] /* 5120 bytes */ 38static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2]
39 IBSS_ATTR __attribute__((aligned(16))); 39 IBSS_ATTR __attribute__((aligned(16))); /* 5120/10240 bytes */
40 40
41/* This is only needed for "insane" files, and no current Rockbox targets 41/* This is only needed for "insane" files, and no current Rockbox targets
42 can hope to decode them in realtime, although the Gigabeat S comes close. */ 42 can hope to decode them in realtime, although the Gigabeat S comes close. */
43static int16_t filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2] /* 17408 bytes */ 43static filter_int filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2]
44 IBSS_ATTR_DEMAC_INSANEBUF __attribute__((aligned(16))); 44 IBSS_ATTR_DEMAC_INSANEBUF __attribute__((aligned(16)));
45 /* 17408 or 34816 bytes */
45 46
46void init_frame_decoder(struct ape_ctx_t* ape_ctx, 47void init_frame_decoder(struct ape_ctx_t* ape_ctx,
47 unsigned char* inbuffer, int* firstbyte, 48 unsigned char* inbuffer, int* firstbyte,
diff --git a/apps/codecs/demac/libdemac/demac_config.h b/apps/codecs/demac/libdemac/demac_config.h
index 93fda76e25..86c2d24919 100644
--- a/apps/codecs/demac/libdemac/demac_config.h
+++ b/apps/codecs/demac/libdemac/demac_config.h
@@ -39,12 +39,21 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
39 39
40#define APE_OUTPUT_DEPTH 29 40#define APE_OUTPUT_DEPTH 29
41 41
42/* On PP5002 code should go into IRAM. Otherwise put the insane 42/* On ARMv4, using 32 bit ints for the filters is faster. */
43 * filter buffer into IRAM as long as there is no better use. */ 43#if defined(CPU_ARM) && (ARM_ARCH == 4)
44#define FILTER_BITS 32
45#endif
46
44#if CONFIG_CPU == PP5002 47#if CONFIG_CPU == PP5002
48/* Code in IRAM for speed, not enough IRAM for the insane filter buffer. */
45#define ICODE_SECTION_DEMAC_ARM .icode 49#define ICODE_SECTION_DEMAC_ARM .icode
46#define ICODE_ATTR_DEMAC ICODE_ATTR 50#define ICODE_ATTR_DEMAC ICODE_ATTR
47#define IBSS_ATTR_DEMAC_INSANEBUF 51#define IBSS_ATTR_DEMAC_INSANEBUF
52#elif CONFIG_CPU == PP5020
53/* Not enough IRAM for the insane filter buffer. */
54#define ICODE_SECTION_DEMAC_ARM .text
55#define ICODE_ATTR_DEMAC
56#define IBSS_ATTR_DEMAC_INSANEBUF
48#else 57#else
49#define ICODE_SECTION_DEMAC_ARM .text 58#define ICODE_SECTION_DEMAC_ARM .text
50#define ICODE_ATTR_DEMAC 59#define ICODE_ATTR_DEMAC
@@ -75,6 +84,20 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
75 84
76#ifndef PREDICTOR_HISTORY_SIZE 85#ifndef PREDICTOR_HISTORY_SIZE
77#define PREDICTOR_HISTORY_SIZE 512 86#define PREDICTOR_HISTORY_SIZE 512
87#endif
88
89#ifndef FILTER_BITS
90#define FILTER_BITS 16
91#endif
92
93
94#ifndef __ASSEMBLER__
95#include <inttypes.h>
96#if FILTER_BITS == 32
97typedef int32_t filter_int;
98#elif FILTER_BITS == 16
99typedef int16_t filter_int;
100#endif
78#endif 101#endif
79 102
80#endif /* _DEMAC_CONFIG_H */ 103#endif /* _DEMAC_CONFIG_H */
diff --git a/apps/codecs/demac/libdemac/filter.c b/apps/codecs/demac/libdemac/filter.c
index b47a37a041..5601fffcd4 100644
--- a/apps/codecs/demac/libdemac/filter.c
+++ b/apps/codecs/demac/libdemac/filter.c
@@ -28,27 +28,38 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
28#include "demac.h" 28#include "demac.h"
29#include "filter.h" 29#include "filter.h"
30#include "demac_config.h" 30#include "demac_config.h"
31
32#if FILTER_BITS == 32
33
34#if defined(CPU_ARM) && (ARM_ARCH == 4)
35#include "vector_math32_armv4.h"
36#else
37#include "vector_math_generic.h"
38#endif
39
40#else /* FILTER_BITS == 16 */
31 41
32#ifdef CPU_COLDFIRE 42#ifdef CPU_COLDFIRE
33#include "vector_math16_cf.h" 43#include "vector_math16_cf.h"
34#elif ARM_ARCH >= 6 44#elif defined(CPU_ARM) && (ARM_ARCH >= 6)
35#include "vector_math16_armv6.h" 45#include "vector_math16_armv6.h"
36#elif ARM_ARCH >= 5 /* Assume all our ARMv5 targets are ARMv5te(j) */ 46#elif defined(CPU_ARM) && (ARM_ARCH >= 5)
47/* Assume all our ARMv5 targets are ARMv5te(j) */
37#include "vector_math16_armv5te.h" 48#include "vector_math16_armv5te.h"
38#elif defined CPU_ARM7TDMI
39#include "vector_math16_arm7.h"
40#else 49#else
41#include "vector_math16.h" 50#include "vector_math_generic.h"
42#endif 51#endif
43 52
53#endif /* FILTER_BITS */
54
44struct filter_t { 55struct filter_t {
45 int16_t* coeffs; /* ORDER entries */ 56 filter_int* coeffs; /* ORDER entries */
46 57
47 /* We store all the filter delays in a single buffer */ 58 /* We store all the filter delays in a single buffer */
48 int16_t* history_end; 59 filter_int* history_end;
49 60
50 int16_t* delay; 61 filter_int* delay;
51 int16_t* adaptcoeffs; 62 filter_int* adaptcoeffs;
52 63
53 int avg; 64 int avg;
54}; 65};
@@ -89,7 +100,7 @@ struct filter_t {
89#if defined(CPU_ARM) && (ARM_ARCH >= 6) 100#if defined(CPU_ARM) && (ARM_ARCH >= 6)
90#define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; }) 101#define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; })
91#else 102#else
92#define SATURATE(x) (int16_t)(((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF); 103#define SATURATE(x) (((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF);
93#endif 104#endif
94 105
95/* Apply the filter with state f to count entries in data[] */ 106/* Apply the filter with state f to count entries in data[] */
@@ -145,7 +156,7 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f,
145 /* Have we filled the history buffer? */ 156 /* Have we filled the history buffer? */
146 if (f->delay == f->history_end) { 157 if (f->delay == f->history_end) {
147 memmove(f->coeffs + ORDER, f->delay - (ORDER*2), 158 memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
148 (ORDER*2) * sizeof(int16_t)); 159 (ORDER*2) * sizeof(filter_int));
149 f->adaptcoeffs = f->coeffs + ORDER*2; 160 f->adaptcoeffs = f->coeffs + ORDER*2;
150 f->delay = f->coeffs + ORDER*3; 161 f->delay = f->coeffs + ORDER*3;
151 } 162 }
@@ -190,7 +201,7 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f,
190 /* Have we filled the history buffer? */ 201 /* Have we filled the history buffer? */
191 if (f->delay == f->history_end) { 202 if (f->delay == f->history_end) {
192 memmove(f->coeffs + ORDER, f->delay - (ORDER*2), 203 memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
193 (ORDER*2) * sizeof(int16_t)); 204 (ORDER*2) * sizeof(filter_int));
194 f->adaptcoeffs = f->coeffs + ORDER*2; 205 f->adaptcoeffs = f->coeffs + ORDER*2;
195 f->delay = f->coeffs + ORDER*3; 206 f->delay = f->coeffs + ORDER*3;
196 } 207 }
@@ -200,7 +211,7 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f,
200static struct filter_t filter0 IBSS_ATTR; 211static struct filter_t filter0 IBSS_ATTR;
201static struct filter_t filter1 IBSS_ATTR; 212static struct filter_t filter1 IBSS_ATTR;
202 213
203static void do_init_filter(struct filter_t* f, int16_t* buf) 214static void do_init_filter(struct filter_t* f, filter_int* buf)
204{ 215{
205 f->coeffs = buf; 216 f->coeffs = buf;
206 f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE; 217 f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE;
@@ -210,13 +221,13 @@ static void do_init_filter(struct filter_t* f, int16_t* buf)
210 f->delay = f->coeffs + ORDER*3; 221 f->delay = f->coeffs + ORDER*3;
211 222
212 /* Zero coefficients and history buffer */ 223 /* Zero coefficients and history buffer */
213 memset(f->coeffs, 0, ORDER*3 * sizeof(int16_t)); 224 memset(f->coeffs, 0, ORDER*3 * sizeof(filter_int));
214 225
215 /* Zero the running average */ 226 /* Zero the running average */
216 f->avg = 0; 227 f->avg = 0;
217} 228}
218 229
219void INIT_FILTER(int16_t* buf) 230void INIT_FILTER(filter_int* buf)
220{ 231{
221 do_init_filter(&filter0, buf); 232 do_init_filter(&filter0, buf);
222 do_init_filter(&filter1, buf + ORDER*3 + FILTER_HISTORY_SIZE); 233 do_init_filter(&filter1, buf + ORDER*3 + FILTER_HISTORY_SIZE);
diff --git a/apps/codecs/demac/libdemac/filter.h b/apps/codecs/demac/libdemac/filter.h
index acbb155b29..bbe51d4572 100644
--- a/apps/codecs/demac/libdemac/filter.h
+++ b/apps/codecs/demac/libdemac/filter.h
@@ -25,21 +25,21 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
25#ifndef _APE_FILTER_H 25#ifndef _APE_FILTER_H
26#define _APE_FILTER_H 26#define _APE_FILTER_H
27 27
28#include <inttypes.h> 28#include "demac_config.h"
29 29
30void init_filter_16_11(int16_t* buf); 30void init_filter_16_11(filter_int* buf);
31int apply_filter_16_11(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); 31int apply_filter_16_11(int fileversion, int32_t* decoded0, int32_t* decoded1, int count);
32 32
33void init_filter_64_11(int16_t* buf); 33void init_filter_64_11(filter_int* buf);
34int apply_filter_64_11(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); 34int apply_filter_64_11(int fileversion, int32_t* decoded0, int32_t* decoded1, int count);
35 35
36void init_filter_32_10(int16_t* buf); 36void init_filter_32_10(filter_int* buf);
37int apply_filter_32_10(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); 37int apply_filter_32_10(int fileversion, int32_t* decoded0, int32_t* decoded1, int count);
38 38
39void init_filter_256_13(int16_t* buf); 39void init_filter_256_13(filter_int* buf);
40int apply_filter_256_13(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); 40int apply_filter_256_13(int fileversion, int32_t* decoded0, int32_t* decoded1, int count);
41 41
42void init_filter_1280_15(int16_t* buf); 42void init_filter_1280_15(filter_int* buf);
43int apply_filter_1280_15(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); 43int apply_filter_1280_15(int fileversion, int32_t* decoded0, int32_t* decoded1, int count);
44 44
45#endif 45#endif
diff --git a/apps/codecs/demac/libdemac/vector_math16_arm7.h b/apps/codecs/demac/libdemac/vector_math16_arm7.h
deleted file mode 100644
index 653bb1f53f..0000000000
--- a/apps/codecs/demac/libdemac/vector_math16_arm7.h
+++ /dev/null
@@ -1,293 +0,0 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9ARM7 vector math copyright (C) 2007 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27/* This version fetches data as 32 bit words, and *requires* v1 to be
28 * 32 bit aligned, otherwise it will result either in a data abort, or
29 * incorrect results (if ARM aligncheck is disabled). */
30static inline void vector_add(int16_t* v1, int16_t* v2)
31{
32#if ORDER > 16
33 int cnt = ORDER>>4;
34#endif
35
36#define ADDHALFREGS(sum, s1) /* Adds register */ \
37 "mov " #s1 ", " #s1 ", ror #16 \n" /* halves straight. */ \
38 "add r8 , " #s1 ", " #sum ", lsl #16 \n" /* Clobbers 's1' */ \
39 "add " #sum ", " #s1 ", " #sum ", lsr #16 \n" /* and r8. */ \
40 "mov " #sum ", " #sum ", lsl #16 \n" \
41 "orr " #sum ", " #sum ", r8 , lsr #16 \n"
42
43#define ADDHALFXREGS(sum, s1, s2) /* Adds register */ \
44 "add " #s1 ", " #s1 ", " #sum ", lsl #16 \n" /* halves across. */ \
45 "add " #sum ", " #s2 ", " #sum ", lsr #16 \n" /* Clobbers 's1'. */ \
46 "mov " #sum ", " #sum ", lsl #16 \n" \
47 "orr " #sum ", " #sum ", " #s1 ", lsr #16 \n"
48
49 asm volatile (
50 "tst %[v2], #2 \n"
51 "beq 20f \n"
52
53 "10: \n"
54 "ldrh r4, [%[v2]], #2 \n"
55 "mov r4, r4, lsl #16 \n"
56 "1: \n"
57 "ldmia %[v1], {r0-r3} \n"
58 "ldmia %[v2]!, {r5-r8} \n"
59 ADDHALFXREGS(r0, r4, r5)
60 ADDHALFXREGS(r1, r5, r6)
61 ADDHALFXREGS(r2, r6, r7)
62 ADDHALFXREGS(r3, r7, r8)
63 "stmia %[v1]!, {r0-r3} \n"
64 "mov r4, r8 \n"
65 "ldmia %[v1], {r0-r3} \n"
66 "ldmia %[v2]!, {r5-r8} \n"
67 ADDHALFXREGS(r0, r4, r5)
68 ADDHALFXREGS(r1, r5, r6)
69 ADDHALFXREGS(r2, r6, r7)
70 ADDHALFXREGS(r3, r7, r8)
71 "stmia %[v1]!, {r0-r3} \n"
72#if ORDER > 16
73 "mov r4, r8 \n"
74 "subs %[cnt], %[cnt], #1 \n"
75 "bne 1b \n"
76#endif
77 "b 99f \n"
78
79 "20: \n"
80 "1: \n"
81 "ldmia %[v1], {r0-r3} \n"
82 "ldmia %[v2]!, {r4-r7} \n"
83 ADDHALFREGS(r0, r4)
84 ADDHALFREGS(r1, r5)
85 ADDHALFREGS(r2, r6)
86 ADDHALFREGS(r3, r7)
87 "stmia %[v1]!, {r0-r3} \n"
88 "ldmia %[v1], {r0-r3} \n"
89 "ldmia %[v2]!, {r4-r7} \n"
90 ADDHALFREGS(r0, r4)
91 ADDHALFREGS(r1, r5)
92 ADDHALFREGS(r2, r6)
93 ADDHALFREGS(r3, r7)
94 "stmia %[v1]!, {r0-r3} \n"
95#if ORDER > 16
96 "subs %[cnt], %[cnt], #1 \n"
97 "bne 1b \n"
98#endif
99
100 "99: \n"
101 : /* outputs */
102#if ORDER > 16
103 [cnt]"+r"(cnt),
104#endif
105 [v1] "+r"(v1),
106 [v2] "+r"(v2)
107 : /* inputs */
108 : /* clobbers */
109 "r0", "r1", "r2", "r3", "r4",
110 "r5", "r6", "r7", "r8", "memory"
111 );
112}
113
114/* This version fetches data as 32 bit words, and *requires* v1 to be
115 * 32 bit aligned, otherwise it will result either in a data abort, or
116 * incorrect results (if ARM aligncheck is disabled). */
117static inline void vector_sub(int16_t* v1, int16_t* v2)
118{
119#if ORDER > 16
120 int cnt = ORDER>>4;
121#endif
122
123#define SUBHALFREGS(dif, s1) /* Subtracts register */ \
124 "sub r8 , " #dif ", " #s1 "\n" /* halves straight. */ \
125 "and r8 , r8 , r9 \n" /* Needs r9 = 0x0000ffff, */ \
126 "mov " #dif ", " #dif ", lsr #16 \n" /* clobbers r8. */ \
127 "sub " #dif ", " #dif ", " #s1 ", lsr #16 \n" \
128 "orr " #dif ", r8 , " #dif ", lsl #16 \n"
129
130#define SUBHALFXREGS(dif, s1, s2) /* Subtracts register */ \
131 "sub " #s1 ", " #dif ", " #s1 ", lsr #16 \n" /* halves across. */ \
132 "and " #s1 ", " #s1 ", r9 \n" /* Needs r9 = 0x0000ffff, */ \
133 "rsb " #dif ", " #s2 ", " #dif ", lsr #16 \n" /* clobbers 's1'. */ \
134 "orr " #dif ", " #s1 ", " #dif ", lsl #16 \n"
135
136 asm volatile (
137 "mov r9, #0xff \n"
138 "orr r9, r9, #0xff00 \n"
139 "tst %[v2], #2 \n"
140 "beq 20f \n"
141
142 "10: \n"
143 "ldrh r4, [%[v2]], #2 \n"
144 "mov r4, r4, lsl #16 \n"
145 "1: \n"
146 "ldmia %[v1], {r0-r3} \n"
147 "ldmia %[v2]!, {r5-r8} \n"
148 SUBHALFXREGS(r0, r4, r5)
149 SUBHALFXREGS(r1, r5, r6)
150 SUBHALFXREGS(r2, r6, r7)
151 SUBHALFXREGS(r3, r7, r8)
152 "stmia %[v1]!, {r0-r3} \n"
153 "mov r4, r8 \n"
154 "ldmia %[v1], {r0-r3} \n"
155 "ldmia %[v2]!, {r5-r8} \n"
156 SUBHALFXREGS(r0, r4, r5)
157 SUBHALFXREGS(r1, r5, r6)
158 SUBHALFXREGS(r2, r6, r7)
159 SUBHALFXREGS(r3, r7, r8)
160 "stmia %[v1]!, {r0-r3} \n"
161#if ORDER > 16
162 "mov r4, r8 \n"
163 "subs %[cnt], %[cnt], #1 \n"
164 "bne 1b \n"
165#endif
166 "b 99f \n"
167
168 "20: \n"
169 "1: \n"
170 "ldmia %[v1], {r0-r3} \n"
171 "ldmia %[v2]!, {r4-r7} \n"
172 SUBHALFREGS(r0, r4)
173 SUBHALFREGS(r1, r5)
174 SUBHALFREGS(r2, r6)
175 SUBHALFREGS(r3, r7)
176 "stmia %[v1]!, {r0-r3} \n"
177 "ldmia %[v1], {r0-r3} \n"
178 "ldmia %[v2]!, {r4-r7} \n"
179 SUBHALFREGS(r0, r4)
180 SUBHALFREGS(r1, r5)
181 SUBHALFREGS(r2, r6)
182 SUBHALFREGS(r3, r7)
183 "stmia %[v1]!, {r0-r3} \n"
184#if ORDER > 16
185 "subs %[cnt], %[cnt], #1 \n"
186 "bne 1b \n"
187#endif
188
189 "99: \n"
190 : /* outputs */
191#if ORDER > 16
192 [cnt]"+r"(cnt),
193#endif
194 [v1] "+r"(v1),
195 [v2] "+r"(v2)
196 : /* inputs */
197 : /* clobbers */
198 "r0", "r1", "r2", "r3", "r4", "r5",
199 "r6", "r7", "r8", "r9", "memory"
200 );
201}
202
203/* This version fetches data as 32 bit words, and *requires* v1 to be
204 * 32 bit aligned, otherwise it will result either in a data abort, or
205 * incorrect results (if ARM aligncheck is disabled). It is optimised
206 * for ARM7TDMI. Using it for ARM9 or higher results in worse performance
207 * than the C version. */
208static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
209{
210 int res = 0;
211#if ORDER > 16
212 int cnt = ORDER>>4;
213#endif
214
215#define MLABLOCK2(f1, f2) \
216 "mov r8, " #f1 ", lsl #16 \n" \
217 "mov r8, r8 , asr #16 \n" \
218 "mov r9, " #f2 ", lsl #16 \n" \
219 "mov r9, r9 , asr #16 \n" \
220 "mla %[res], r9, r8, %[res] \n" \
221 "mov r8, " #f1 ", asr #16 \n" \
222 "mov r9, " #f2 ", asr #16 \n" \
223 "mla %[res], r9, r8, %[res] \n"
224
225#define MLABLOCK2_U2(f1, f2) \
226 "mov r8, " #f1 ", lsl #16 \n" \
227 "mov r8, r8 , asr #16 \n" \
228 "mla %[res], r9, r8, %[res] \n" \
229 "mov r8, " #f1 ", asr #16 \n" \
230 "mov r9, " #f2 ", lsl #16 \n" \
231 "mov r9, r9 , asr #16 \n" \
232 "mla %[res], r9, r8, %[res] \n" \
233 "mov r9, " #f2 ", asr #16 \n"
234
235 asm volatile (
236 "tst %[v2], #2 \n"
237 "beq 20f \n"
238
239 "10: \n"
240 "ldrsh r9, [%[v2]], #2 \n"
241 "1: \n"
242 "ldmia %[v1]!, {r0-r3} \n"
243 "ldmia %[v2]!, {r4-r7} \n"
244 MLABLOCK2_U2(r0, r4)
245 MLABLOCK2_U2(r1, r5)
246 MLABLOCK2_U2(r2, r6)
247 MLABLOCK2_U2(r3, r7)
248 "ldmia %[v1]!, {r0-r3} \n"
249 "ldmia %[v2]!, {r4-r7} \n"
250 MLABLOCK2_U2(r0, r4)
251 MLABLOCK2_U2(r1, r5)
252 MLABLOCK2_U2(r2, r6)
253 MLABLOCK2_U2(r3, r7)
254#if ORDER > 16
255 "subs %[cnt], %[cnt], #1 \n"
256 "bne 1b \n"
257#endif
258 "b 99f \n"
259
260 "20: \n"
261 "1: \n"
262 "ldmia %[v1]!, {r0-r3} \n"
263 "ldmia %[v2]!, {r4-r7} \n"
264 MLABLOCK2(r0, r4)
265 MLABLOCK2(r1, r5)
266 MLABLOCK2(r2, r6)
267 MLABLOCK2(r3, r7)
268 "ldmia %[v1]!, {r0-r3} \n"
269 "ldmia %[v2]!, {r4-r7} \n"
270 MLABLOCK2(r0, r4)
271 MLABLOCK2(r1, r5)
272 MLABLOCK2(r2, r6)
273 MLABLOCK2(r3, r7)
274#if ORDER > 16
275 "subs %[cnt], %[cnt], #1 \n"
276 "bne 1b \n"
277#endif
278
279 "99: \n"
280 : /* outputs */
281#if ORDER > 16
282 [cnt]"+r"(cnt),
283#endif
284 [v1] "+r"(v1),
285 [v2] "+r"(v2),
286 [res]"+r"(res)
287 : /* inputs */
288 : /* clobbers */
289 "r0", "r1", "r2", "r3", "r4",
290 "r5", "r6", "r7", "r8", "r9"
291 );
292 return res;
293}
diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h
new file mode 100644
index 0000000000..b729bd3a0a
--- /dev/null
+++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h
@@ -0,0 +1,210 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9ARMv4 vector math copyright (C) 2008 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27static inline void vector_add(int32_t* v1, int32_t* v2)
28{
29#if ORDER > 32
30 int cnt = ORDER>>5;
31#endif
32
33#define ADDBLOCK4 \
34 "ldmia %[v1], {r0-r3} \n" \
35 "ldmia %[v2]!, {r4-r7} \n" \
36 "add r0, r0, r4 \n" \
37 "add r1, r1, r5 \n" \
38 "add r2, r2, r6 \n" \
39 "add r3, r3, r7 \n" \
40 "stmia %[v1]!, {r0-r3} \n"
41
42 asm volatile (
43 "1: \n"
44 ADDBLOCK4
45 ADDBLOCK4
46 ADDBLOCK4
47 ADDBLOCK4
48#if ORDER > 16
49 ADDBLOCK4
50 ADDBLOCK4
51 ADDBLOCK4
52 ADDBLOCK4
53#endif
54#if ORDER > 32
55 "subs %[cnt], %[cnt], #1 \n"
56 "bne 1b \n"
57#endif
58 : /* outputs */
59#if ORDER > 32
60 [cnt]"+r"(cnt),
61#endif
62 [v1] "+r"(v1),
63 [v2] "+r"(v2)
64 : /* inputs */
65 : /* clobbers */
66 "r0", "r1", "r2", "r3", "r4",
67 "r5", "r6", "r7", "memory"
68 );
69}
70
71static inline void vector_sub(int32_t* v1, int32_t* v2)
72{
73#if ORDER > 32
74 int cnt = ORDER>>5;
75#endif
76
77#define SUBBLOCK4 \
78 "ldmia %[v1], {r0-r3} \n" \
79 "ldmia %[v2]!, {r4-r7} \n" \
80 "sub r0, r0, r4 \n" \
81 "sub r1, r1, r5 \n" \
82 "sub r2, r2, r6 \n" \
83 "sub r3, r3, r7 \n" \
84 "stmia %[v1]!, {r0-r3} \n"
85
86 asm volatile (
87 "1: \n"
88 SUBBLOCK4
89 SUBBLOCK4
90 SUBBLOCK4
91 SUBBLOCK4
92#if ORDER > 16
93 SUBBLOCK4
94 SUBBLOCK4
95 SUBBLOCK4
96 SUBBLOCK4
97#endif
98#if ORDER > 32
99 "subs %[cnt], %[cnt], #1 \n"
100 "bne 1b \n"
101#endif
102 : /* outputs */
103#if ORDER > 32
104 [cnt]"+r"(cnt),
105#endif
106 [v1] "+r"(v1),
107 [v2] "+r"(v2)
108 : /* inputs */
109 : /* clobbers */
110 "r0", "r1", "r2", "r3", "r4",
111 "r5", "r6", "r7", "memory"
112 );
113}
114
115static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
116{
117 int res = 0;
118#if ORDER > 32
119 int cnt = ORDER>>5;
120#endif
121
122 asm volatile (
123#if ORDER > 16
124 "ldmia %[v2]!, {r6-r7} \n"
125 "1: \n"
126 "ldmia %[v1]!, {r0,r1,r3-r5} \n"
127 "mla %[res], r6, r0, %[res] \n"
128 "mla %[res], r7, r1, %[res] \n"
129 "ldmia %[v2]!, {r0-r2,r6-r8} \n"
130 "mla %[res], r0, r3, %[res] \n"
131 "mla %[res], r1, r4, %[res] \n"
132 "mla %[res], r2, r5, %[res] \n"
133 "ldmia %[v1]!, {r0-r4} \n"
134 "mla %[res], r6, r0, %[res] \n"
135 "mla %[res], r7, r1, %[res] \n"
136 "mla %[res], r8, r2, %[res] \n"
137 "ldmia %[v2]!, {r0,r1,r6-r8} \n"
138 "mla %[res], r0, r3, %[res] \n"
139 "mla %[res], r1, r4, %[res] \n"
140 "ldmia %[v1]!, {r0-r5} \n"
141 "mla %[res], r6, r0, %[res] \n"
142 "mla %[res], r7, r1, %[res] \n"
143 "mla %[res], r8, r2, %[res] \n"
144 "ldmia %[v2]!, {r0-r2,r6,r7} \n"
145 "mla %[res], r0, r3, %[res] \n"
146 "mla %[res], r1, r4, %[res] \n"
147 "mla %[res], r2, r5, %[res] \n"
148 "ldmia %[v1]!, {r0,r1,r3-r5} \n"
149 "mla %[res], r6, r0, %[res] \n"
150 "mla %[res], r7, r1, %[res] \n"
151 "ldmia %[v2]!, {r0-r2,r6-r8} \n"
152 "mla %[res], r0, r3, %[res] \n"
153 "mla %[res], r1, r4, %[res] \n"
154 "mla %[res], r2, r5, %[res] \n"
155 "ldmia %[v1]!, {r0-r4} \n"
156 "mla %[res], r6, r0, %[res] \n"
157 "mla %[res], r7, r1, %[res] \n"
158 "mla %[res], r8, r2, %[res] \n"
159 "ldmia %[v2]!, {r0,r1,r6-r8} \n"
160 "mla %[res], r0, r3, %[res] \n"
161 "mla %[res], r1, r4, %[res] \n"
162 "ldmia %[v1]!, {r0-r5} \n"
163 "mla %[res], r6, r0, %[res] \n"
164 "mla %[res], r7, r1, %[res] \n"
165 "mla %[res], r8, r2, %[res] \n"
166#if ORDER > 32
167 "ldmia %[v2]!, {r0-r2,r6,r7} \n"
168#else
169 "ldmia %[v2]!, {r0-r2} \n"
170#endif
171 "mla %[res], r0, r3, %[res] \n"
172 "mla %[res], r1, r4, %[res] \n"
173 "mla %[res], r2, r5, %[res] \n"
174#if ORDER > 32
175 "subs %[cnt], %[cnt], #1 \n"
176 "bne 1b \n"
177#endif
178
179#else /* ORDER <= 16 */
180
181#define MLABLOCK4 \
182 "ldmia %[v1]!, {r0-r3} \n" \
183 "ldmia %[v2]!, {r4-r7} \n" \
184 "mla %[res], r4, r0, %[res] \n" \
185 "mla %[res], r5, r1, %[res] \n" \
186 "mla %[res], r6, r2, %[res] \n" \
187 "mla %[res], r7, r3, %[res] \n"
188
189 MLABLOCK4
190 MLABLOCK4
191 MLABLOCK4
192 MLABLOCK4
193#endif /* ORDER <= 16 */
194 : /* outputs */
195#if ORDER > 32
196 [cnt]"+r"(cnt),
197#endif
198 [v1] "+r"(v1),
199 [v2] "+r"(v2),
200 [res]"+r"(res)
201 : /* inputs */
202 : /* clobbers */
203 "r0", "r1", "r2", "r3",
204 "r4", "r5", "r6", "r7"
205#if ORDER > 16
206 ,"r8"
207#endif
208 );
209 return res;
210}
diff --git a/apps/codecs/demac/libdemac/vector_math16.h b/apps/codecs/demac/libdemac/vector_math_generic.h
index 5d82abe930..7b61db77be 100644
--- a/apps/codecs/demac/libdemac/vector_math16.h
+++ b/apps/codecs/demac/libdemac/vector_math_generic.h
@@ -2,7 +2,7 @@
2 2
3libdemac - A Monkey's Audio decoder 3libdemac - A Monkey's Audio decoder
4 4
5$Id:$ 5$Id$
6 6
7Copyright (C) Dave Chapman 2007 7Copyright (C) Dave Chapman 2007
8 8
@@ -22,7 +22,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22 22
23*/ 23*/
24 24
25static inline void vector_add(int16_t* v1, int16_t* v2) 25#include "demac_config.h"
26
27static inline void vector_add(filter_int* v1, filter_int* v2)
26{ 28{
27#if ORDER > 32 29#if ORDER > 32
28 int order = (ORDER >> 5); 30 int order = (ORDER >> 5);
@@ -66,7 +68,7 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
66 } 68 }
67} 69}
68 70
69static inline void vector_sub(int16_t* v1, int16_t* v2) 71static inline void vector_sub(filter_int* v1, filter_int* v2)
70{ 72{
71#if ORDER > 32 73#if ORDER > 32
72 int order = (ORDER >> 5); 74 int order = (ORDER >> 5);
@@ -110,7 +112,7 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
110 } 112 }
111} 113}
112 114
113static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) 115static inline int32_t scalarproduct(filter_int* v1, filter_int* v2)
114{ 116{
115 int res = 0; 117 int res = 0;
116 118