diff options
Diffstat (limited to 'apps/codecs/demac')
-rw-r--r-- | apps/codecs/demac/libdemac/decoder.c | 13 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/demac_config.h | 27 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/filter.c | 41 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/filter.h | 12 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_arm7.h | 293 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math32_armv4.h | 210 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math_generic.h (renamed from apps/codecs/demac/libdemac/vector_math16.h) | 10 |
7 files changed, 280 insertions, 326 deletions
diff --git a/apps/codecs/demac/libdemac/decoder.c b/apps/codecs/demac/libdemac/decoder.c index 540db47636..31bcb28b72 100644 --- a/apps/codecs/demac/libdemac/decoder.c +++ b/apps/codecs/demac/libdemac/decoder.c | |||
@@ -33,15 +33,16 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
33 | 33 | ||
34 | /* Statically allocate the filter buffers */ | 34 | /* Statically allocate the filter buffers */ |
35 | 35 | ||
36 | static int16_t filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2] /* 2432 bytes */ | 36 | static filter_int filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2] |
37 | IBSS_ATTR __attribute__((aligned(16))); | 37 | IBSS_ATTR __attribute__((aligned(16))); /* 2432/4864 bytes */ |
38 | static int16_t filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2] /* 5120 bytes */ | 38 | static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2] |
39 | IBSS_ATTR __attribute__((aligned(16))); | 39 | IBSS_ATTR __attribute__((aligned(16))); /* 5120/10240 bytes */ |
40 | 40 | ||
41 | /* This is only needed for "insane" files, and no current Rockbox targets | 41 | /* This is only needed for "insane" files, and no current Rockbox targets |
42 | can hope to decode them in realtime, although the Gigabeat S comes close. */ | 42 | can hope to decode them in realtime, although the Gigabeat S comes close. */ |
43 | static int16_t filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2] /* 17408 bytes */ | 43 | static filter_int filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2] |
44 | IBSS_ATTR_DEMAC_INSANEBUF __attribute__((aligned(16))); | 44 | IBSS_ATTR_DEMAC_INSANEBUF __attribute__((aligned(16))); |
45 | /* 17408 or 34816 bytes */ | ||
45 | 46 | ||
46 | void init_frame_decoder(struct ape_ctx_t* ape_ctx, | 47 | void init_frame_decoder(struct ape_ctx_t* ape_ctx, |
47 | unsigned char* inbuffer, int* firstbyte, | 48 | unsigned char* inbuffer, int* firstbyte, |
diff --git a/apps/codecs/demac/libdemac/demac_config.h b/apps/codecs/demac/libdemac/demac_config.h index 93fda76e25..86c2d24919 100644 --- a/apps/codecs/demac/libdemac/demac_config.h +++ b/apps/codecs/demac/libdemac/demac_config.h | |||
@@ -39,12 +39,21 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
39 | 39 | ||
40 | #define APE_OUTPUT_DEPTH 29 | 40 | #define APE_OUTPUT_DEPTH 29 |
41 | 41 | ||
42 | /* On PP5002 code should go into IRAM. Otherwise put the insane | 42 | /* On ARMv4, using 32 bit ints for the filters is faster. */ |
43 | * filter buffer into IRAM as long as there is no better use. */ | 43 | #if defined(CPU_ARM) && (ARM_ARCH == 4) |
44 | #define FILTER_BITS 32 | ||
45 | #endif | ||
46 | |||
44 | #if CONFIG_CPU == PP5002 | 47 | #if CONFIG_CPU == PP5002 |
48 | /* Code in IRAM for speed, not enough IRAM for the insane filter buffer. */ | ||
45 | #define ICODE_SECTION_DEMAC_ARM .icode | 49 | #define ICODE_SECTION_DEMAC_ARM .icode |
46 | #define ICODE_ATTR_DEMAC ICODE_ATTR | 50 | #define ICODE_ATTR_DEMAC ICODE_ATTR |
47 | #define IBSS_ATTR_DEMAC_INSANEBUF | 51 | #define IBSS_ATTR_DEMAC_INSANEBUF |
52 | #elif CONFIG_CPU == PP5020 | ||
53 | /* Not enough IRAM for the insane filter buffer. */ | ||
54 | #define ICODE_SECTION_DEMAC_ARM .text | ||
55 | #define ICODE_ATTR_DEMAC | ||
56 | #define IBSS_ATTR_DEMAC_INSANEBUF | ||
48 | #else | 57 | #else |
49 | #define ICODE_SECTION_DEMAC_ARM .text | 58 | #define ICODE_SECTION_DEMAC_ARM .text |
50 | #define ICODE_ATTR_DEMAC | 59 | #define ICODE_ATTR_DEMAC |
@@ -75,6 +84,20 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
75 | 84 | ||
76 | #ifndef PREDICTOR_HISTORY_SIZE | 85 | #ifndef PREDICTOR_HISTORY_SIZE |
77 | #define PREDICTOR_HISTORY_SIZE 512 | 86 | #define PREDICTOR_HISTORY_SIZE 512 |
87 | #endif | ||
88 | |||
89 | #ifndef FILTER_BITS | ||
90 | #define FILTER_BITS 16 | ||
91 | #endif | ||
92 | |||
93 | |||
94 | #ifndef __ASSEMBLER__ | ||
95 | #include <inttypes.h> | ||
96 | #if FILTER_BITS == 32 | ||
97 | typedef int32_t filter_int; | ||
98 | #elif FILTER_BITS == 16 | ||
99 | typedef int16_t filter_int; | ||
100 | #endif | ||
78 | #endif | 101 | #endif |
79 | 102 | ||
80 | #endif /* _DEMAC_CONFIG_H */ | 103 | #endif /* _DEMAC_CONFIG_H */ |
diff --git a/apps/codecs/demac/libdemac/filter.c b/apps/codecs/demac/libdemac/filter.c index b47a37a041..5601fffcd4 100644 --- a/apps/codecs/demac/libdemac/filter.c +++ b/apps/codecs/demac/libdemac/filter.c | |||
@@ -28,27 +28,38 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
28 | #include "demac.h" | 28 | #include "demac.h" |
29 | #include "filter.h" | 29 | #include "filter.h" |
30 | #include "demac_config.h" | 30 | #include "demac_config.h" |
31 | |||
32 | #if FILTER_BITS == 32 | ||
33 | |||
34 | #if defined(CPU_ARM) && (ARM_ARCH == 4) | ||
35 | #include "vector_math32_armv4.h" | ||
36 | #else | ||
37 | #include "vector_math_generic.h" | ||
38 | #endif | ||
39 | |||
40 | #else /* FILTER_BITS == 16 */ | ||
31 | 41 | ||
32 | #ifdef CPU_COLDFIRE | 42 | #ifdef CPU_COLDFIRE |
33 | #include "vector_math16_cf.h" | 43 | #include "vector_math16_cf.h" |
34 | #elif ARM_ARCH >= 6 | 44 | #elif defined(CPU_ARM) && (ARM_ARCH >= 6) |
35 | #include "vector_math16_armv6.h" | 45 | #include "vector_math16_armv6.h" |
36 | #elif ARM_ARCH >= 5 /* Assume all our ARMv5 targets are ARMv5te(j) */ | 46 | #elif defined(CPU_ARM) && (ARM_ARCH >= 5) |
47 | /* Assume all our ARMv5 targets are ARMv5te(j) */ | ||
37 | #include "vector_math16_armv5te.h" | 48 | #include "vector_math16_armv5te.h" |
38 | #elif defined CPU_ARM7TDMI | ||
39 | #include "vector_math16_arm7.h" | ||
40 | #else | 49 | #else |
41 | #include "vector_math16.h" | 50 | #include "vector_math_generic.h" |
42 | #endif | 51 | #endif |
43 | 52 | ||
53 | #endif /* FILTER_BITS */ | ||
54 | |||
44 | struct filter_t { | 55 | struct filter_t { |
45 | int16_t* coeffs; /* ORDER entries */ | 56 | filter_int* coeffs; /* ORDER entries */ |
46 | 57 | ||
47 | /* We store all the filter delays in a single buffer */ | 58 | /* We store all the filter delays in a single buffer */ |
48 | int16_t* history_end; | 59 | filter_int* history_end; |
49 | 60 | ||
50 | int16_t* delay; | 61 | filter_int* delay; |
51 | int16_t* adaptcoeffs; | 62 | filter_int* adaptcoeffs; |
52 | 63 | ||
53 | int avg; | 64 | int avg; |
54 | }; | 65 | }; |
@@ -89,7 +100,7 @@ struct filter_t { | |||
89 | #if defined(CPU_ARM) && (ARM_ARCH >= 6) | 100 | #if defined(CPU_ARM) && (ARM_ARCH >= 6) |
90 | #define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; }) | 101 | #define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; }) |
91 | #else | 102 | #else |
92 | #define SATURATE(x) (int16_t)(((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF); | 103 | #define SATURATE(x) (((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF); |
93 | #endif | 104 | #endif |
94 | 105 | ||
95 | /* Apply the filter with state f to count entries in data[] */ | 106 | /* Apply the filter with state f to count entries in data[] */ |
@@ -145,7 +156,7 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f, | |||
145 | /* Have we filled the history buffer? */ | 156 | /* Have we filled the history buffer? */ |
146 | if (f->delay == f->history_end) { | 157 | if (f->delay == f->history_end) { |
147 | memmove(f->coeffs + ORDER, f->delay - (ORDER*2), | 158 | memmove(f->coeffs + ORDER, f->delay - (ORDER*2), |
148 | (ORDER*2) * sizeof(int16_t)); | 159 | (ORDER*2) * sizeof(filter_int)); |
149 | f->adaptcoeffs = f->coeffs + ORDER*2; | 160 | f->adaptcoeffs = f->coeffs + ORDER*2; |
150 | f->delay = f->coeffs + ORDER*3; | 161 | f->delay = f->coeffs + ORDER*3; |
151 | } | 162 | } |
@@ -190,7 +201,7 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f, | |||
190 | /* Have we filled the history buffer? */ | 201 | /* Have we filled the history buffer? */ |
191 | if (f->delay == f->history_end) { | 202 | if (f->delay == f->history_end) { |
192 | memmove(f->coeffs + ORDER, f->delay - (ORDER*2), | 203 | memmove(f->coeffs + ORDER, f->delay - (ORDER*2), |
193 | (ORDER*2) * sizeof(int16_t)); | 204 | (ORDER*2) * sizeof(filter_int)); |
194 | f->adaptcoeffs = f->coeffs + ORDER*2; | 205 | f->adaptcoeffs = f->coeffs + ORDER*2; |
195 | f->delay = f->coeffs + ORDER*3; | 206 | f->delay = f->coeffs + ORDER*3; |
196 | } | 207 | } |
@@ -200,7 +211,7 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f, | |||
200 | static struct filter_t filter0 IBSS_ATTR; | 211 | static struct filter_t filter0 IBSS_ATTR; |
201 | static struct filter_t filter1 IBSS_ATTR; | 212 | static struct filter_t filter1 IBSS_ATTR; |
202 | 213 | ||
203 | static void do_init_filter(struct filter_t* f, int16_t* buf) | 214 | static void do_init_filter(struct filter_t* f, filter_int* buf) |
204 | { | 215 | { |
205 | f->coeffs = buf; | 216 | f->coeffs = buf; |
206 | f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE; | 217 | f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE; |
@@ -210,13 +221,13 @@ static void do_init_filter(struct filter_t* f, int16_t* buf) | |||
210 | f->delay = f->coeffs + ORDER*3; | 221 | f->delay = f->coeffs + ORDER*3; |
211 | 222 | ||
212 | /* Zero coefficients and history buffer */ | 223 | /* Zero coefficients and history buffer */ |
213 | memset(f->coeffs, 0, ORDER*3 * sizeof(int16_t)); | 224 | memset(f->coeffs, 0, ORDER*3 * sizeof(filter_int)); |
214 | 225 | ||
215 | /* Zero the running average */ | 226 | /* Zero the running average */ |
216 | f->avg = 0; | 227 | f->avg = 0; |
217 | } | 228 | } |
218 | 229 | ||
219 | void INIT_FILTER(int16_t* buf) | 230 | void INIT_FILTER(filter_int* buf) |
220 | { | 231 | { |
221 | do_init_filter(&filter0, buf); | 232 | do_init_filter(&filter0, buf); |
222 | do_init_filter(&filter1, buf + ORDER*3 + FILTER_HISTORY_SIZE); | 233 | do_init_filter(&filter1, buf + ORDER*3 + FILTER_HISTORY_SIZE); |
diff --git a/apps/codecs/demac/libdemac/filter.h b/apps/codecs/demac/libdemac/filter.h index acbb155b29..bbe51d4572 100644 --- a/apps/codecs/demac/libdemac/filter.h +++ b/apps/codecs/demac/libdemac/filter.h | |||
@@ -25,21 +25,21 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
25 | #ifndef _APE_FILTER_H | 25 | #ifndef _APE_FILTER_H |
26 | #define _APE_FILTER_H | 26 | #define _APE_FILTER_H |
27 | 27 | ||
28 | #include <inttypes.h> | 28 | #include "demac_config.h" |
29 | 29 | ||
30 | void init_filter_16_11(int16_t* buf); | 30 | void init_filter_16_11(filter_int* buf); |
31 | int apply_filter_16_11(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); | 31 | int apply_filter_16_11(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); |
32 | 32 | ||
33 | void init_filter_64_11(int16_t* buf); | 33 | void init_filter_64_11(filter_int* buf); |
34 | int apply_filter_64_11(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); | 34 | int apply_filter_64_11(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); |
35 | 35 | ||
36 | void init_filter_32_10(int16_t* buf); | 36 | void init_filter_32_10(filter_int* buf); |
37 | int apply_filter_32_10(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); | 37 | int apply_filter_32_10(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); |
38 | 38 | ||
39 | void init_filter_256_13(int16_t* buf); | 39 | void init_filter_256_13(filter_int* buf); |
40 | int apply_filter_256_13(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); | 40 | int apply_filter_256_13(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); |
41 | 41 | ||
42 | void init_filter_1280_15(int16_t* buf); | 42 | void init_filter_1280_15(filter_int* buf); |
43 | int apply_filter_1280_15(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); | 43 | int apply_filter_1280_15(int fileversion, int32_t* decoded0, int32_t* decoded1, int count); |
44 | 44 | ||
45 | #endif | 45 | #endif |
diff --git a/apps/codecs/demac/libdemac/vector_math16_arm7.h b/apps/codecs/demac/libdemac/vector_math16_arm7.h deleted file mode 100644 index 653bb1f53f..0000000000 --- a/apps/codecs/demac/libdemac/vector_math16_arm7.h +++ /dev/null | |||
@@ -1,293 +0,0 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | ARM7 vector math copyright (C) 2007 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | /* This version fetches data as 32 bit words, and *requires* v1 to be | ||
28 | * 32 bit aligned, otherwise it will result either in a data abort, or | ||
29 | * incorrect results (if ARM aligncheck is disabled). */ | ||
30 | static inline void vector_add(int16_t* v1, int16_t* v2) | ||
31 | { | ||
32 | #if ORDER > 16 | ||
33 | int cnt = ORDER>>4; | ||
34 | #endif | ||
35 | |||
36 | #define ADDHALFREGS(sum, s1) /* Adds register */ \ | ||
37 | "mov " #s1 ", " #s1 ", ror #16 \n" /* halves straight. */ \ | ||
38 | "add r8 , " #s1 ", " #sum ", lsl #16 \n" /* Clobbers 's1' */ \ | ||
39 | "add " #sum ", " #s1 ", " #sum ", lsr #16 \n" /* and r8. */ \ | ||
40 | "mov " #sum ", " #sum ", lsl #16 \n" \ | ||
41 | "orr " #sum ", " #sum ", r8 , lsr #16 \n" | ||
42 | |||
43 | #define ADDHALFXREGS(sum, s1, s2) /* Adds register */ \ | ||
44 | "add " #s1 ", " #s1 ", " #sum ", lsl #16 \n" /* halves across. */ \ | ||
45 | "add " #sum ", " #s2 ", " #sum ", lsr #16 \n" /* Clobbers 's1'. */ \ | ||
46 | "mov " #sum ", " #sum ", lsl #16 \n" \ | ||
47 | "orr " #sum ", " #sum ", " #s1 ", lsr #16 \n" | ||
48 | |||
49 | asm volatile ( | ||
50 | "tst %[v2], #2 \n" | ||
51 | "beq 20f \n" | ||
52 | |||
53 | "10: \n" | ||
54 | "ldrh r4, [%[v2]], #2 \n" | ||
55 | "mov r4, r4, lsl #16 \n" | ||
56 | "1: \n" | ||
57 | "ldmia %[v1], {r0-r3} \n" | ||
58 | "ldmia %[v2]!, {r5-r8} \n" | ||
59 | ADDHALFXREGS(r0, r4, r5) | ||
60 | ADDHALFXREGS(r1, r5, r6) | ||
61 | ADDHALFXREGS(r2, r6, r7) | ||
62 | ADDHALFXREGS(r3, r7, r8) | ||
63 | "stmia %[v1]!, {r0-r3} \n" | ||
64 | "mov r4, r8 \n" | ||
65 | "ldmia %[v1], {r0-r3} \n" | ||
66 | "ldmia %[v2]!, {r5-r8} \n" | ||
67 | ADDHALFXREGS(r0, r4, r5) | ||
68 | ADDHALFXREGS(r1, r5, r6) | ||
69 | ADDHALFXREGS(r2, r6, r7) | ||
70 | ADDHALFXREGS(r3, r7, r8) | ||
71 | "stmia %[v1]!, {r0-r3} \n" | ||
72 | #if ORDER > 16 | ||
73 | "mov r4, r8 \n" | ||
74 | "subs %[cnt], %[cnt], #1 \n" | ||
75 | "bne 1b \n" | ||
76 | #endif | ||
77 | "b 99f \n" | ||
78 | |||
79 | "20: \n" | ||
80 | "1: \n" | ||
81 | "ldmia %[v1], {r0-r3} \n" | ||
82 | "ldmia %[v2]!, {r4-r7} \n" | ||
83 | ADDHALFREGS(r0, r4) | ||
84 | ADDHALFREGS(r1, r5) | ||
85 | ADDHALFREGS(r2, r6) | ||
86 | ADDHALFREGS(r3, r7) | ||
87 | "stmia %[v1]!, {r0-r3} \n" | ||
88 | "ldmia %[v1], {r0-r3} \n" | ||
89 | "ldmia %[v2]!, {r4-r7} \n" | ||
90 | ADDHALFREGS(r0, r4) | ||
91 | ADDHALFREGS(r1, r5) | ||
92 | ADDHALFREGS(r2, r6) | ||
93 | ADDHALFREGS(r3, r7) | ||
94 | "stmia %[v1]!, {r0-r3} \n" | ||
95 | #if ORDER > 16 | ||
96 | "subs %[cnt], %[cnt], #1 \n" | ||
97 | "bne 1b \n" | ||
98 | #endif | ||
99 | |||
100 | "99: \n" | ||
101 | : /* outputs */ | ||
102 | #if ORDER > 16 | ||
103 | [cnt]"+r"(cnt), | ||
104 | #endif | ||
105 | [v1] "+r"(v1), | ||
106 | [v2] "+r"(v2) | ||
107 | : /* inputs */ | ||
108 | : /* clobbers */ | ||
109 | "r0", "r1", "r2", "r3", "r4", | ||
110 | "r5", "r6", "r7", "r8", "memory" | ||
111 | ); | ||
112 | } | ||
113 | |||
114 | /* This version fetches data as 32 bit words, and *requires* v1 to be | ||
115 | * 32 bit aligned, otherwise it will result either in a data abort, or | ||
116 | * incorrect results (if ARM aligncheck is disabled). */ | ||
117 | static inline void vector_sub(int16_t* v1, int16_t* v2) | ||
118 | { | ||
119 | #if ORDER > 16 | ||
120 | int cnt = ORDER>>4; | ||
121 | #endif | ||
122 | |||
123 | #define SUBHALFREGS(dif, s1) /* Subtracts register */ \ | ||
124 | "sub r8 , " #dif ", " #s1 "\n" /* halves straight. */ \ | ||
125 | "and r8 , r8 , r9 \n" /* Needs r9 = 0x0000ffff, */ \ | ||
126 | "mov " #dif ", " #dif ", lsr #16 \n" /* clobbers r8. */ \ | ||
127 | "sub " #dif ", " #dif ", " #s1 ", lsr #16 \n" \ | ||
128 | "orr " #dif ", r8 , " #dif ", lsl #16 \n" | ||
129 | |||
130 | #define SUBHALFXREGS(dif, s1, s2) /* Subtracts register */ \ | ||
131 | "sub " #s1 ", " #dif ", " #s1 ", lsr #16 \n" /* halves across. */ \ | ||
132 | "and " #s1 ", " #s1 ", r9 \n" /* Needs r9 = 0x0000ffff, */ \ | ||
133 | "rsb " #dif ", " #s2 ", " #dif ", lsr #16 \n" /* clobbers 's1'. */ \ | ||
134 | "orr " #dif ", " #s1 ", " #dif ", lsl #16 \n" | ||
135 | |||
136 | asm volatile ( | ||
137 | "mov r9, #0xff \n" | ||
138 | "orr r9, r9, #0xff00 \n" | ||
139 | "tst %[v2], #2 \n" | ||
140 | "beq 20f \n" | ||
141 | |||
142 | "10: \n" | ||
143 | "ldrh r4, [%[v2]], #2 \n" | ||
144 | "mov r4, r4, lsl #16 \n" | ||
145 | "1: \n" | ||
146 | "ldmia %[v1], {r0-r3} \n" | ||
147 | "ldmia %[v2]!, {r5-r8} \n" | ||
148 | SUBHALFXREGS(r0, r4, r5) | ||
149 | SUBHALFXREGS(r1, r5, r6) | ||
150 | SUBHALFXREGS(r2, r6, r7) | ||
151 | SUBHALFXREGS(r3, r7, r8) | ||
152 | "stmia %[v1]!, {r0-r3} \n" | ||
153 | "mov r4, r8 \n" | ||
154 | "ldmia %[v1], {r0-r3} \n" | ||
155 | "ldmia %[v2]!, {r5-r8} \n" | ||
156 | SUBHALFXREGS(r0, r4, r5) | ||
157 | SUBHALFXREGS(r1, r5, r6) | ||
158 | SUBHALFXREGS(r2, r6, r7) | ||
159 | SUBHALFXREGS(r3, r7, r8) | ||
160 | "stmia %[v1]!, {r0-r3} \n" | ||
161 | #if ORDER > 16 | ||
162 | "mov r4, r8 \n" | ||
163 | "subs %[cnt], %[cnt], #1 \n" | ||
164 | "bne 1b \n" | ||
165 | #endif | ||
166 | "b 99f \n" | ||
167 | |||
168 | "20: \n" | ||
169 | "1: \n" | ||
170 | "ldmia %[v1], {r0-r3} \n" | ||
171 | "ldmia %[v2]!, {r4-r7} \n" | ||
172 | SUBHALFREGS(r0, r4) | ||
173 | SUBHALFREGS(r1, r5) | ||
174 | SUBHALFREGS(r2, r6) | ||
175 | SUBHALFREGS(r3, r7) | ||
176 | "stmia %[v1]!, {r0-r3} \n" | ||
177 | "ldmia %[v1], {r0-r3} \n" | ||
178 | "ldmia %[v2]!, {r4-r7} \n" | ||
179 | SUBHALFREGS(r0, r4) | ||
180 | SUBHALFREGS(r1, r5) | ||
181 | SUBHALFREGS(r2, r6) | ||
182 | SUBHALFREGS(r3, r7) | ||
183 | "stmia %[v1]!, {r0-r3} \n" | ||
184 | #if ORDER > 16 | ||
185 | "subs %[cnt], %[cnt], #1 \n" | ||
186 | "bne 1b \n" | ||
187 | #endif | ||
188 | |||
189 | "99: \n" | ||
190 | : /* outputs */ | ||
191 | #if ORDER > 16 | ||
192 | [cnt]"+r"(cnt), | ||
193 | #endif | ||
194 | [v1] "+r"(v1), | ||
195 | [v2] "+r"(v2) | ||
196 | : /* inputs */ | ||
197 | : /* clobbers */ | ||
198 | "r0", "r1", "r2", "r3", "r4", "r5", | ||
199 | "r6", "r7", "r8", "r9", "memory" | ||
200 | ); | ||
201 | } | ||
202 | |||
203 | /* This version fetches data as 32 bit words, and *requires* v1 to be | ||
204 | * 32 bit aligned, otherwise it will result either in a data abort, or | ||
205 | * incorrect results (if ARM aligncheck is disabled). It is optimised | ||
206 | * for ARM7TDMI. Using it for ARM9 or higher results in worse performance | ||
207 | * than the C version. */ | ||
208 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | ||
209 | { | ||
210 | int res = 0; | ||
211 | #if ORDER > 16 | ||
212 | int cnt = ORDER>>4; | ||
213 | #endif | ||
214 | |||
215 | #define MLABLOCK2(f1, f2) \ | ||
216 | "mov r8, " #f1 ", lsl #16 \n" \ | ||
217 | "mov r8, r8 , asr #16 \n" \ | ||
218 | "mov r9, " #f2 ", lsl #16 \n" \ | ||
219 | "mov r9, r9 , asr #16 \n" \ | ||
220 | "mla %[res], r9, r8, %[res] \n" \ | ||
221 | "mov r8, " #f1 ", asr #16 \n" \ | ||
222 | "mov r9, " #f2 ", asr #16 \n" \ | ||
223 | "mla %[res], r9, r8, %[res] \n" | ||
224 | |||
225 | #define MLABLOCK2_U2(f1, f2) \ | ||
226 | "mov r8, " #f1 ", lsl #16 \n" \ | ||
227 | "mov r8, r8 , asr #16 \n" \ | ||
228 | "mla %[res], r9, r8, %[res] \n" \ | ||
229 | "mov r8, " #f1 ", asr #16 \n" \ | ||
230 | "mov r9, " #f2 ", lsl #16 \n" \ | ||
231 | "mov r9, r9 , asr #16 \n" \ | ||
232 | "mla %[res], r9, r8, %[res] \n" \ | ||
233 | "mov r9, " #f2 ", asr #16 \n" | ||
234 | |||
235 | asm volatile ( | ||
236 | "tst %[v2], #2 \n" | ||
237 | "beq 20f \n" | ||
238 | |||
239 | "10: \n" | ||
240 | "ldrsh r9, [%[v2]], #2 \n" | ||
241 | "1: \n" | ||
242 | "ldmia %[v1]!, {r0-r3} \n" | ||
243 | "ldmia %[v2]!, {r4-r7} \n" | ||
244 | MLABLOCK2_U2(r0, r4) | ||
245 | MLABLOCK2_U2(r1, r5) | ||
246 | MLABLOCK2_U2(r2, r6) | ||
247 | MLABLOCK2_U2(r3, r7) | ||
248 | "ldmia %[v1]!, {r0-r3} \n" | ||
249 | "ldmia %[v2]!, {r4-r7} \n" | ||
250 | MLABLOCK2_U2(r0, r4) | ||
251 | MLABLOCK2_U2(r1, r5) | ||
252 | MLABLOCK2_U2(r2, r6) | ||
253 | MLABLOCK2_U2(r3, r7) | ||
254 | #if ORDER > 16 | ||
255 | "subs %[cnt], %[cnt], #1 \n" | ||
256 | "bne 1b \n" | ||
257 | #endif | ||
258 | "b 99f \n" | ||
259 | |||
260 | "20: \n" | ||
261 | "1: \n" | ||
262 | "ldmia %[v1]!, {r0-r3} \n" | ||
263 | "ldmia %[v2]!, {r4-r7} \n" | ||
264 | MLABLOCK2(r0, r4) | ||
265 | MLABLOCK2(r1, r5) | ||
266 | MLABLOCK2(r2, r6) | ||
267 | MLABLOCK2(r3, r7) | ||
268 | "ldmia %[v1]!, {r0-r3} \n" | ||
269 | "ldmia %[v2]!, {r4-r7} \n" | ||
270 | MLABLOCK2(r0, r4) | ||
271 | MLABLOCK2(r1, r5) | ||
272 | MLABLOCK2(r2, r6) | ||
273 | MLABLOCK2(r3, r7) | ||
274 | #if ORDER > 16 | ||
275 | "subs %[cnt], %[cnt], #1 \n" | ||
276 | "bne 1b \n" | ||
277 | #endif | ||
278 | |||
279 | "99: \n" | ||
280 | : /* outputs */ | ||
281 | #if ORDER > 16 | ||
282 | [cnt]"+r"(cnt), | ||
283 | #endif | ||
284 | [v1] "+r"(v1), | ||
285 | [v2] "+r"(v2), | ||
286 | [res]"+r"(res) | ||
287 | : /* inputs */ | ||
288 | : /* clobbers */ | ||
289 | "r0", "r1", "r2", "r3", "r4", | ||
290 | "r5", "r6", "r7", "r8", "r9" | ||
291 | ); | ||
292 | return res; | ||
293 | } | ||
diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h new file mode 100644 index 0000000000..b729bd3a0a --- /dev/null +++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h | |||
@@ -0,0 +1,210 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | ARMv4 vector math copyright (C) 2008 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | static inline void vector_add(int32_t* v1, int32_t* v2) | ||
28 | { | ||
29 | #if ORDER > 32 | ||
30 | int cnt = ORDER>>5; | ||
31 | #endif | ||
32 | |||
33 | #define ADDBLOCK4 \ | ||
34 | "ldmia %[v1], {r0-r3} \n" \ | ||
35 | "ldmia %[v2]!, {r4-r7} \n" \ | ||
36 | "add r0, r0, r4 \n" \ | ||
37 | "add r1, r1, r5 \n" \ | ||
38 | "add r2, r2, r6 \n" \ | ||
39 | "add r3, r3, r7 \n" \ | ||
40 | "stmia %[v1]!, {r0-r3} \n" | ||
41 | |||
42 | asm volatile ( | ||
43 | "1: \n" | ||
44 | ADDBLOCK4 | ||
45 | ADDBLOCK4 | ||
46 | ADDBLOCK4 | ||
47 | ADDBLOCK4 | ||
48 | #if ORDER > 16 | ||
49 | ADDBLOCK4 | ||
50 | ADDBLOCK4 | ||
51 | ADDBLOCK4 | ||
52 | ADDBLOCK4 | ||
53 | #endif | ||
54 | #if ORDER > 32 | ||
55 | "subs %[cnt], %[cnt], #1 \n" | ||
56 | "bne 1b \n" | ||
57 | #endif | ||
58 | : /* outputs */ | ||
59 | #if ORDER > 32 | ||
60 | [cnt]"+r"(cnt), | ||
61 | #endif | ||
62 | [v1] "+r"(v1), | ||
63 | [v2] "+r"(v2) | ||
64 | : /* inputs */ | ||
65 | : /* clobbers */ | ||
66 | "r0", "r1", "r2", "r3", "r4", | ||
67 | "r5", "r6", "r7", "memory" | ||
68 | ); | ||
69 | } | ||
70 | |||
71 | static inline void vector_sub(int32_t* v1, int32_t* v2) | ||
72 | { | ||
73 | #if ORDER > 32 | ||
74 | int cnt = ORDER>>5; | ||
75 | #endif | ||
76 | |||
77 | #define SUBBLOCK4 \ | ||
78 | "ldmia %[v1], {r0-r3} \n" \ | ||
79 | "ldmia %[v2]!, {r4-r7} \n" \ | ||
80 | "sub r0, r0, r4 \n" \ | ||
81 | "sub r1, r1, r5 \n" \ | ||
82 | "sub r2, r2, r6 \n" \ | ||
83 | "sub r3, r3, r7 \n" \ | ||
84 | "stmia %[v1]!, {r0-r3} \n" | ||
85 | |||
86 | asm volatile ( | ||
87 | "1: \n" | ||
88 | SUBBLOCK4 | ||
89 | SUBBLOCK4 | ||
90 | SUBBLOCK4 | ||
91 | SUBBLOCK4 | ||
92 | #if ORDER > 16 | ||
93 | SUBBLOCK4 | ||
94 | SUBBLOCK4 | ||
95 | SUBBLOCK4 | ||
96 | SUBBLOCK4 | ||
97 | #endif | ||
98 | #if ORDER > 32 | ||
99 | "subs %[cnt], %[cnt], #1 \n" | ||
100 | "bne 1b \n" | ||
101 | #endif | ||
102 | : /* outputs */ | ||
103 | #if ORDER > 32 | ||
104 | [cnt]"+r"(cnt), | ||
105 | #endif | ||
106 | [v1] "+r"(v1), | ||
107 | [v2] "+r"(v2) | ||
108 | : /* inputs */ | ||
109 | : /* clobbers */ | ||
110 | "r0", "r1", "r2", "r3", "r4", | ||
111 | "r5", "r6", "r7", "memory" | ||
112 | ); | ||
113 | } | ||
114 | |||
115 | static inline int32_t scalarproduct(int32_t* v1, int32_t* v2) | ||
116 | { | ||
117 | int res = 0; | ||
118 | #if ORDER > 32 | ||
119 | int cnt = ORDER>>5; | ||
120 | #endif | ||
121 | |||
122 | asm volatile ( | ||
123 | #if ORDER > 16 | ||
124 | "ldmia %[v2]!, {r6-r7} \n" | ||
125 | "1: \n" | ||
126 | "ldmia %[v1]!, {r0,r1,r3-r5} \n" | ||
127 | "mla %[res], r6, r0, %[res] \n" | ||
128 | "mla %[res], r7, r1, %[res] \n" | ||
129 | "ldmia %[v2]!, {r0-r2,r6-r8} \n" | ||
130 | "mla %[res], r0, r3, %[res] \n" | ||
131 | "mla %[res], r1, r4, %[res] \n" | ||
132 | "mla %[res], r2, r5, %[res] \n" | ||
133 | "ldmia %[v1]!, {r0-r4} \n" | ||
134 | "mla %[res], r6, r0, %[res] \n" | ||
135 | "mla %[res], r7, r1, %[res] \n" | ||
136 | "mla %[res], r8, r2, %[res] \n" | ||
137 | "ldmia %[v2]!, {r0,r1,r6-r8} \n" | ||
138 | "mla %[res], r0, r3, %[res] \n" | ||
139 | "mla %[res], r1, r4, %[res] \n" | ||
140 | "ldmia %[v1]!, {r0-r5} \n" | ||
141 | "mla %[res], r6, r0, %[res] \n" | ||
142 | "mla %[res], r7, r1, %[res] \n" | ||
143 | "mla %[res], r8, r2, %[res] \n" | ||
144 | "ldmia %[v2]!, {r0-r2,r6,r7} \n" | ||
145 | "mla %[res], r0, r3, %[res] \n" | ||
146 | "mla %[res], r1, r4, %[res] \n" | ||
147 | "mla %[res], r2, r5, %[res] \n" | ||
148 | "ldmia %[v1]!, {r0,r1,r3-r5} \n" | ||
149 | "mla %[res], r6, r0, %[res] \n" | ||
150 | "mla %[res], r7, r1, %[res] \n" | ||
151 | "ldmia %[v2]!, {r0-r2,r6-r8} \n" | ||
152 | "mla %[res], r0, r3, %[res] \n" | ||
153 | "mla %[res], r1, r4, %[res] \n" | ||
154 | "mla %[res], r2, r5, %[res] \n" | ||
155 | "ldmia %[v1]!, {r0-r4} \n" | ||
156 | "mla %[res], r6, r0, %[res] \n" | ||
157 | "mla %[res], r7, r1, %[res] \n" | ||
158 | "mla %[res], r8, r2, %[res] \n" | ||
159 | "ldmia %[v2]!, {r0,r1,r6-r8} \n" | ||
160 | "mla %[res], r0, r3, %[res] \n" | ||
161 | "mla %[res], r1, r4, %[res] \n" | ||
162 | "ldmia %[v1]!, {r0-r5} \n" | ||
163 | "mla %[res], r6, r0, %[res] \n" | ||
164 | "mla %[res], r7, r1, %[res] \n" | ||
165 | "mla %[res], r8, r2, %[res] \n" | ||
166 | #if ORDER > 32 | ||
167 | "ldmia %[v2]!, {r0-r2,r6,r7} \n" | ||
168 | #else | ||
169 | "ldmia %[v2]!, {r0-r2} \n" | ||
170 | #endif | ||
171 | "mla %[res], r0, r3, %[res] \n" | ||
172 | "mla %[res], r1, r4, %[res] \n" | ||
173 | "mla %[res], r2, r5, %[res] \n" | ||
174 | #if ORDER > 32 | ||
175 | "subs %[cnt], %[cnt], #1 \n" | ||
176 | "bne 1b \n" | ||
177 | #endif | ||
178 | |||
179 | #else /* ORDER <= 16 */ | ||
180 | |||
181 | #define MLABLOCK4 \ | ||
182 | "ldmia %[v1]!, {r0-r3} \n" \ | ||
183 | "ldmia %[v2]!, {r4-r7} \n" \ | ||
184 | "mla %[res], r4, r0, %[res] \n" \ | ||
185 | "mla %[res], r5, r1, %[res] \n" \ | ||
186 | "mla %[res], r6, r2, %[res] \n" \ | ||
187 | "mla %[res], r7, r3, %[res] \n" | ||
188 | |||
189 | MLABLOCK4 | ||
190 | MLABLOCK4 | ||
191 | MLABLOCK4 | ||
192 | MLABLOCK4 | ||
193 | #endif /* ORDER <= 16 */ | ||
194 | : /* outputs */ | ||
195 | #if ORDER > 32 | ||
196 | [cnt]"+r"(cnt), | ||
197 | #endif | ||
198 | [v1] "+r"(v1), | ||
199 | [v2] "+r"(v2), | ||
200 | [res]"+r"(res) | ||
201 | : /* inputs */ | ||
202 | : /* clobbers */ | ||
203 | "r0", "r1", "r2", "r3", | ||
204 | "r4", "r5", "r6", "r7" | ||
205 | #if ORDER > 16 | ||
206 | ,"r8" | ||
207 | #endif | ||
208 | ); | ||
209 | return res; | ||
210 | } | ||
diff --git a/apps/codecs/demac/libdemac/vector_math16.h b/apps/codecs/demac/libdemac/vector_math_generic.h index 5d82abe930..7b61db77be 100644 --- a/apps/codecs/demac/libdemac/vector_math16.h +++ b/apps/codecs/demac/libdemac/vector_math_generic.h | |||
@@ -2,7 +2,7 @@ | |||
2 | 2 | ||
3 | libdemac - A Monkey's Audio decoder | 3 | libdemac - A Monkey's Audio decoder |
4 | 4 | ||
5 | $Id:$ | 5 | $Id$ |
6 | 6 | ||
7 | Copyright (C) Dave Chapman 2007 | 7 | Copyright (C) Dave Chapman 2007 |
8 | 8 | ||
@@ -22,7 +22,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
22 | 22 | ||
23 | */ | 23 | */ |
24 | 24 | ||
25 | static inline void vector_add(int16_t* v1, int16_t* v2) | 25 | #include "demac_config.h" |
26 | |||
27 | static inline void vector_add(filter_int* v1, filter_int* v2) | ||
26 | { | 28 | { |
27 | #if ORDER > 32 | 29 | #if ORDER > 32 |
28 | int order = (ORDER >> 5); | 30 | int order = (ORDER >> 5); |
@@ -66,7 +68,7 @@ static inline void vector_add(int16_t* v1, int16_t* v2) | |||
66 | } | 68 | } |
67 | } | 69 | } |
68 | 70 | ||
69 | static inline void vector_sub(int16_t* v1, int16_t* v2) | 71 | static inline void vector_sub(filter_int* v1, filter_int* v2) |
70 | { | 72 | { |
71 | #if ORDER > 32 | 73 | #if ORDER > 32 |
72 | int order = (ORDER >> 5); | 74 | int order = (ORDER >> 5); |
@@ -110,7 +112,7 @@ static inline void vector_sub(int16_t* v1, int16_t* v2) | |||
110 | } | 112 | } |
111 | } | 113 | } |
112 | 114 | ||
113 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | 115 | static inline int32_t scalarproduct(filter_int* v1, filter_int* v2) |
114 | { | 116 | { |
115 | int res = 0; | 117 | int res = 0; |
116 | 118 | ||