diff options
Diffstat (limited to 'lib/rbcodec/codecs/demac/libdemac')
30 files changed, 6196 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/demac/libdemac/SOURCES b/lib/rbcodec/codecs/demac/libdemac/SOURCES new file mode 100644 index 0000000000..018f35a73c --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/SOURCES | |||
@@ -0,0 +1,15 @@ | |||
1 | predictor.c | ||
2 | #ifdef CPU_ARM | ||
3 | predictor-arm.S | ||
4 | udiv32_arm.S | ||
5 | #elif defined CPU_COLDFIRE | ||
6 | predictor-cf.S | ||
7 | #endif | ||
8 | entropy.c | ||
9 | decoder.c | ||
10 | parser.c | ||
11 | filter_1280_15.c | ||
12 | filter_16_11.c | ||
13 | filter_256_13.c | ||
14 | filter_32_10.c | ||
15 | filter_64_11.c | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/crc.c b/lib/rbcodec/codecs/demac/libdemac/crc.c new file mode 100644 index 0000000000..fa3ea89d7e --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/crc.c | |||
@@ -0,0 +1,120 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #include <inttypes.h> | ||
26 | #include "demac.h" | ||
27 | |||
28 | static const uint32_t crctab32[] = | ||
29 | { | ||
30 | 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, | ||
31 | 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, | ||
32 | 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, | ||
33 | 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, | ||
34 | 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, | ||
35 | 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, | ||
36 | 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, | ||
37 | 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, | ||
38 | 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, | ||
39 | 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, | ||
40 | 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, | ||
41 | 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, | ||
42 | 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, | ||
43 | 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, | ||
44 | 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, | ||
45 | 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, | ||
46 | |||
47 | 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, | ||
48 | 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, | ||
49 | 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, | ||
50 | 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, | ||
51 | 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, | ||
52 | 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, | ||
53 | 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, | ||
54 | 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, | ||
55 | 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, | ||
56 | 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, | ||
57 | 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, | ||
58 | 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, | ||
59 | 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, | ||
60 | 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, | ||
61 | 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, | ||
62 | 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, | ||
63 | |||
64 | 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, | ||
65 | 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, | ||
66 | 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, | ||
67 | 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, | ||
68 | 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, | ||
69 | 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, | ||
70 | 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, | ||
71 | 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, | ||
72 | 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, | ||
73 | 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, | ||
74 | 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, | ||
75 | 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, | ||
76 | 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, | ||
77 | 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, | ||
78 | 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, | ||
79 | 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, | ||
80 | |||
81 | 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, | ||
82 | 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, | ||
83 | 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, | ||
84 | 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, | ||
85 | 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, | ||
86 | 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, | ||
87 | 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, | ||
88 | 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, | ||
89 | 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, | ||
90 | 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, | ||
91 | 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, | ||
92 | 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, | ||
93 | 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, | ||
94 | 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, | ||
95 | 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, | ||
96 | 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D | ||
97 | }; | ||
98 | |||
99 | uint32_t ape_initcrc(void) | ||
100 | { | ||
101 | return 0xffffffff; | ||
102 | } | ||
103 | |||
104 | /* Update the CRC from a block of WAV-format audio data */ | ||
105 | uint32_t ape_updatecrc(unsigned char *block, int count, uint32_t crc) | ||
106 | { | ||
107 | while (count--) | ||
108 | crc = (crc >> 8) ^ crctab32[(crc & 0xff) ^ *block++]; | ||
109 | |||
110 | return crc; | ||
111 | } | ||
112 | |||
113 | uint32_t ape_finishcrc(uint32_t crc) | ||
114 | { | ||
115 | crc ^= 0xffffffff; | ||
116 | crc >>= 1; | ||
117 | |||
118 | return crc; | ||
119 | } | ||
120 | |||
diff --git a/lib/rbcodec/codecs/demac/libdemac/decoder.c b/lib/rbcodec/codecs/demac/libdemac/decoder.c new file mode 100644 index 0000000000..b0339a75d9 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/decoder.c | |||
@@ -0,0 +1,216 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #include <inttypes.h> | ||
26 | #include <string.h> | ||
27 | |||
28 | #include "demac.h" | ||
29 | #include "predictor.h" | ||
30 | #include "entropy.h" | ||
31 | #include "filter.h" | ||
32 | #include "demac_config.h" | ||
33 | |||
34 | /* Statically allocate the filter buffers */ | ||
35 | |||
36 | #ifdef FILTER256_IRAM | ||
37 | static filter_int filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2] | ||
38 | IBSS_ATTR_DEMAC MEM_ALIGN_ATTR; | ||
39 | /* 2432 or 4864 bytes */ | ||
40 | static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2] | ||
41 | IBSS_ATTR_DEMAC MEM_ALIGN_ATTR; | ||
42 | /* 5120 or 10240 bytes */ | ||
43 | #define FILTERBUF64 filterbuf256 | ||
44 | #define FILTERBUF32 filterbuf32 | ||
45 | #define FILTERBUF16 filterbuf32 | ||
46 | #else | ||
47 | static filter_int filterbuf64[(64*3 + FILTER_HISTORY_SIZE) * 2] | ||
48 | IBSS_ATTR_DEMAC MEM_ALIGN_ATTR; | ||
49 | /* 2432 or 4864 bytes */ | ||
50 | static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2] | ||
51 | MEM_ALIGN_ATTR; /* 5120 or 10240 bytes */ | ||
52 | #define FILTERBUF64 filterbuf64 | ||
53 | #define FILTERBUF32 filterbuf64 | ||
54 | #define FILTERBUF16 filterbuf64 | ||
55 | #endif | ||
56 | |||
57 | /* This is only needed for "insane" files, and no current Rockbox targets | ||
58 | can hope to decode them in realtime, except the Gigabeat S (at 528MHz). */ | ||
59 | static filter_int filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2] | ||
60 | IBSS_ATTR_DEMAC_INSANEBUF MEM_ALIGN_ATTR; | ||
61 | /* 17408 or 34816 bytes */ | ||
62 | |||
63 | void init_frame_decoder(struct ape_ctx_t* ape_ctx, | ||
64 | unsigned char* inbuffer, int* firstbyte, | ||
65 | int* bytesconsumed) | ||
66 | { | ||
67 | init_entropy_decoder(ape_ctx, inbuffer, firstbyte, bytesconsumed); | ||
68 | //printf("CRC=0x%08x\n",ape_ctx->CRC); | ||
69 | //printf("Flags=0x%08x\n",ape_ctx->frameflags); | ||
70 | |||
71 | init_predictor_decoder(&ape_ctx->predictor); | ||
72 | |||
73 | switch (ape_ctx->compressiontype) | ||
74 | { | ||
75 | case 2000: | ||
76 | init_filter_16_11(FILTERBUF16); | ||
77 | break; | ||
78 | |||
79 | case 3000: | ||
80 | init_filter_64_11(FILTERBUF64); | ||
81 | break; | ||
82 | |||
83 | case 4000: | ||
84 | init_filter_256_13(filterbuf256); | ||
85 | init_filter_32_10(FILTERBUF32); | ||
86 | break; | ||
87 | |||
88 | case 5000: | ||
89 | init_filter_1280_15(filterbuf1280); | ||
90 | init_filter_256_13(filterbuf256); | ||
91 | init_filter_16_11(FILTERBUF32); | ||
92 | } | ||
93 | } | ||
94 | |||
95 | int ICODE_ATTR_DEMAC decode_chunk(struct ape_ctx_t* ape_ctx, | ||
96 | unsigned char* inbuffer, int* firstbyte, | ||
97 | int* bytesconsumed, | ||
98 | int32_t* decoded0, int32_t* decoded1, | ||
99 | int count) | ||
100 | { | ||
101 | int32_t left, right; | ||
102 | #ifdef ROCKBOX | ||
103 | int scale = (APE_OUTPUT_DEPTH - ape_ctx->bps); | ||
104 | #define SCALE(x) ((x) << scale) | ||
105 | #else | ||
106 | #define SCALE(x) (x) | ||
107 | #endif | ||
108 | |||
109 | if ((ape_ctx->channels==1) || ((ape_ctx->frameflags | ||
110 | & (APE_FRAMECODE_PSEUDO_STEREO|APE_FRAMECODE_STEREO_SILENCE)) | ||
111 | == APE_FRAMECODE_PSEUDO_STEREO)) { | ||
112 | |||
113 | entropy_decode(ape_ctx, inbuffer, firstbyte, bytesconsumed, | ||
114 | decoded0, NULL, count); | ||
115 | |||
116 | if (ape_ctx->frameflags & APE_FRAMECODE_MONO_SILENCE) { | ||
117 | /* We are pure silence, so we're done. */ | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | switch (ape_ctx->compressiontype) | ||
122 | { | ||
123 | case 2000: | ||
124 | apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count); | ||
125 | break; | ||
126 | |||
127 | case 3000: | ||
128 | apply_filter_64_11(ape_ctx->fileversion,0,decoded0,count); | ||
129 | break; | ||
130 | |||
131 | case 4000: | ||
132 | apply_filter_32_10(ape_ctx->fileversion,0,decoded0,count); | ||
133 | apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count); | ||
134 | break; | ||
135 | |||
136 | case 5000: | ||
137 | apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count); | ||
138 | apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count); | ||
139 | apply_filter_1280_15(ape_ctx->fileversion,0,decoded0,count); | ||
140 | } | ||
141 | |||
142 | /* Now apply the predictor decoding */ | ||
143 | predictor_decode_mono(&ape_ctx->predictor,decoded0,count); | ||
144 | |||
145 | if (ape_ctx->channels==2) { | ||
146 | /* Pseudo-stereo - copy left channel to right channel */ | ||
147 | while (count--) | ||
148 | { | ||
149 | left = *decoded0; | ||
150 | *(decoded1++) = *(decoded0++) = SCALE(left); | ||
151 | } | ||
152 | } | ||
153 | #ifdef ROCKBOX | ||
154 | else { | ||
155 | /* Scale to output depth */ | ||
156 | while (count--) | ||
157 | { | ||
158 | left = *decoded0; | ||
159 | *(decoded0++) = SCALE(left); | ||
160 | } | ||
161 | } | ||
162 | #endif | ||
163 | } else { /* Stereo */ | ||
164 | entropy_decode(ape_ctx, inbuffer, firstbyte, bytesconsumed, | ||
165 | decoded0, decoded1, count); | ||
166 | |||
167 | if ((ape_ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) | ||
168 | == APE_FRAMECODE_STEREO_SILENCE) { | ||
169 | /* We are pure silence, so we're done. */ | ||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | /* Apply filters - compression type 1000 doesn't have any */ | ||
174 | switch (ape_ctx->compressiontype) | ||
175 | { | ||
176 | case 2000: | ||
177 | apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count); | ||
178 | apply_filter_16_11(ape_ctx->fileversion,1,decoded1,count); | ||
179 | break; | ||
180 | |||
181 | case 3000: | ||
182 | apply_filter_64_11(ape_ctx->fileversion,0,decoded0,count); | ||
183 | apply_filter_64_11(ape_ctx->fileversion,1,decoded1,count); | ||
184 | break; | ||
185 | |||
186 | case 4000: | ||
187 | apply_filter_32_10(ape_ctx->fileversion,0,decoded0,count); | ||
188 | apply_filter_32_10(ape_ctx->fileversion,1,decoded1,count); | ||
189 | apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count); | ||
190 | apply_filter_256_13(ape_ctx->fileversion,1,decoded1,count); | ||
191 | break; | ||
192 | |||
193 | case 5000: | ||
194 | apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count); | ||
195 | apply_filter_16_11(ape_ctx->fileversion,1,decoded1,count); | ||
196 | apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count); | ||
197 | apply_filter_256_13(ape_ctx->fileversion,1,decoded1,count); | ||
198 | apply_filter_1280_15(ape_ctx->fileversion,0,decoded0,count); | ||
199 | apply_filter_1280_15(ape_ctx->fileversion,1,decoded1,count); | ||
200 | } | ||
201 | |||
202 | /* Now apply the predictor decoding */ | ||
203 | predictor_decode_stereo(&ape_ctx->predictor,decoded0,decoded1,count); | ||
204 | |||
205 | /* Decorrelate and scale to output depth */ | ||
206 | while (count--) | ||
207 | { | ||
208 | left = *decoded1 - (*decoded0 / 2); | ||
209 | right = left + *decoded0; | ||
210 | |||
211 | *(decoded0++) = SCALE(left); | ||
212 | *(decoded1++) = SCALE(right); | ||
213 | } | ||
214 | } | ||
215 | return 0; | ||
216 | } | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/decoder.h b/lib/rbcodec/codecs/demac/libdemac/decoder.h new file mode 100644 index 0000000000..aeac569509 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/decoder.h | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #ifndef _APE_DECODER_H | ||
26 | #define _APE_DECODER_H | ||
27 | |||
28 | #include <inttypes.h> | ||
29 | #include "parser.h" | ||
30 | |||
31 | void init_frame_decoder(struct ape_ctx_t* ape_ctx, | ||
32 | unsigned char* inbuffer, int* firstbyte, | ||
33 | int* bytesconsumed); | ||
34 | |||
35 | int decode_chunk(struct ape_ctx_t* ape_ctx, | ||
36 | unsigned char* inbuffer, int* firstbyte, | ||
37 | int* bytesconsumed, | ||
38 | int32_t* decoded0, int32_t* decoded1, | ||
39 | int count); | ||
40 | #endif | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/demac.h b/lib/rbcodec/codecs/demac/libdemac/demac.h new file mode 100644 index 0000000000..696b2aba73 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/demac.h | |||
@@ -0,0 +1,45 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #ifndef _APE_DECODER_H | ||
26 | #define _APE_DECODER_H | ||
27 | |||
28 | #include <inttypes.h> | ||
29 | #include "parser.h" | ||
30 | |||
31 | void init_frame_decoder(struct ape_ctx_t* ape_ctx, | ||
32 | unsigned char* inbuffer, int* firstbyte, | ||
33 | int* bytesconsumed); | ||
34 | |||
35 | int decode_chunk(struct ape_ctx_t* ape_ctx, | ||
36 | unsigned char* inbuffer, int* firstbyte, | ||
37 | int* bytesconsumed, | ||
38 | int32_t* decoded0, int32_t* decoded1, | ||
39 | int count); | ||
40 | |||
41 | uint32_t ape_initcrc(void); | ||
42 | uint32_t ape_updatecrc(unsigned char *block, int count, uint32_t crc); | ||
43 | uint32_t ape_finishcrc(uint32_t crc); | ||
44 | |||
45 | #endif | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/demac_config.h b/lib/rbcodec/codecs/demac/libdemac/demac_config.h new file mode 100644 index 0000000000..fa4f008036 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/demac_config.h | |||
@@ -0,0 +1,145 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #ifndef _DEMAC_CONFIG_H | ||
26 | #define _DEMAC_CONFIG_H | ||
27 | |||
28 | /* Build-time choices for libdemac. | ||
29 | * Note that this file is included by both .c and .S files. */ | ||
30 | |||
31 | #ifdef ROCKBOX | ||
32 | |||
33 | #include "config.h" | ||
34 | |||
35 | #ifndef __ASSEMBLER__ | ||
36 | #include "codeclib.h" | ||
37 | #include <codecs.h> | ||
38 | #endif | ||
39 | |||
40 | #define APE_OUTPUT_DEPTH 29 | ||
41 | |||
42 | /* On ARMv4, using 32 bit ints for the filters is faster. */ | ||
43 | #if defined(CPU_ARM) && (ARM_ARCH == 4) | ||
44 | #define FILTER_BITS 32 | ||
45 | #endif | ||
46 | |||
47 | #if !defined(CPU_PP) && !defined(CPU_S5L870X) | ||
48 | #define FILTER256_IRAM | ||
49 | #endif | ||
50 | |||
51 | #if CONFIG_CPU == PP5002 || defined(CPU_S5L870X) | ||
52 | /* Code and data IRAM for speed (PP5002 has a broken cache), not enough IRAM | ||
53 | * for the insane filter buffer. Reciprocal table for division in IRAM. */ | ||
54 | #define ICODE_SECTION_DEMAC_ARM .icode | ||
55 | #define ICODE_ATTR_DEMAC ICODE_ATTR | ||
56 | #define ICONST_ATTR_DEMAC ICONST_ATTR | ||
57 | #define IBSS_ATTR_DEMAC IBSS_ATTR | ||
58 | #define IBSS_ATTR_DEMAC_INSANEBUF | ||
59 | |||
60 | #elif CONFIG_CPU == PP5020 | ||
61 | /* Code and small data in DRAM for speed (PP5020 IRAM isn't completely single | ||
62 | * cycle). Insane filter buffer not in IRAM in favour of reciprocal table for | ||
63 | * divison. Decoded data buffers should be in IRAM (defined by the caller). */ | ||
64 | #define ICODE_SECTION_DEMAC_ARM .text | ||
65 | #define ICODE_ATTR_DEMAC | ||
66 | #define ICONST_ATTR_DEMAC | ||
67 | #define IBSS_ATTR_DEMAC | ||
68 | #define IBSS_ATTR_DEMAC_INSANEBUF | ||
69 | |||
70 | #elif CONFIG_CPU == PP5022 | ||
71 | /* Code in DRAM, data in IRAM. Insane filter buffer not in IRAM in favour of | ||
72 | * reciprocal table for divison */ | ||
73 | #define ICODE_SECTION_DEMAC_ARM .text | ||
74 | #define ICODE_ATTR_DEMAC | ||
75 | #define ICONST_ATTR_DEMAC ICONST_ATTR | ||
76 | #define IBSS_ATTR_DEMAC IBSS_ATTR | ||
77 | #define IBSS_ATTR_DEMAC_INSANEBUF | ||
78 | |||
79 | #else | ||
80 | /* Code in DRAM, data in IRAM, including insane filter buffer. */ | ||
81 | #define ICODE_SECTION_DEMAC_ARM .text | ||
82 | #define ICODE_ATTR_DEMAC | ||
83 | #define ICONST_ATTR_DEMAC ICONST_ATTR | ||
84 | #define IBSS_ATTR_DEMAC IBSS_ATTR | ||
85 | #define IBSS_ATTR_DEMAC_INSANEBUF IBSS_ATTR | ||
86 | #endif | ||
87 | |||
88 | #else /* !ROCKBOX */ | ||
89 | |||
90 | #define APE_OUTPUT_DEPTH (ape_ctx->bps) | ||
91 | |||
92 | #define MEM_ALIGN_ATTR __attribute__((aligned(16))) | ||
93 | /* adjust to target architecture for best performance */ | ||
94 | |||
95 | #define ICODE_ATTR_DEMAC | ||
96 | #define ICONST_ATTR_DEMAC | ||
97 | #define IBSS_ATTR_DEMAC | ||
98 | #define IBSS_ATTR_DEMAC_INSANEBUF | ||
99 | |||
100 | /* Use to give gcc hints on which branch is most likely taken */ | ||
101 | #if defined(__GNUC__) && __GNUC__ >= 3 | ||
102 | #define LIKELY(x) __builtin_expect(!!(x), 1) | ||
103 | #define UNLIKELY(x) __builtin_expect(!!(x), 0) | ||
104 | #else | ||
105 | #define LIKELY(x) (x) | ||
106 | #define UNLIKELY(x) (x) | ||
107 | #endif | ||
108 | |||
109 | #endif /* !ROCKBOX */ | ||
110 | |||
111 | /* Defaults */ | ||
112 | |||
113 | #ifndef FILTER_HISTORY_SIZE | ||
114 | #define FILTER_HISTORY_SIZE 512 | ||
115 | #endif | ||
116 | |||
117 | #ifndef PREDICTOR_HISTORY_SIZE | ||
118 | #define PREDICTOR_HISTORY_SIZE 512 | ||
119 | #endif | ||
120 | |||
121 | #ifndef FILTER_BITS | ||
122 | #define FILTER_BITS 16 | ||
123 | #endif | ||
124 | |||
125 | |||
126 | #ifndef __ASSEMBLER__ | ||
127 | |||
128 | #if defined(CPU_ARM) && (ARM_ARCH < 5 || defined(USE_IRAM)) | ||
129 | /* optimised unsigned integer division for ARMv4, in IRAM */ | ||
130 | unsigned udiv32_arm(unsigned a, unsigned b); | ||
131 | #define UDIV32(a, b) udiv32_arm(a, b) | ||
132 | #else | ||
133 | /* default */ | ||
134 | #define UDIV32(a, b) (a / b) | ||
135 | #endif | ||
136 | |||
137 | #include <inttypes.h> | ||
138 | #if FILTER_BITS == 32 | ||
139 | typedef int32_t filter_int; | ||
140 | #elif FILTER_BITS == 16 | ||
141 | typedef int16_t filter_int; | ||
142 | #endif | ||
143 | #endif | ||
144 | |||
145 | #endif /* _DEMAC_CONFIG_H */ | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/entropy.c b/lib/rbcodec/codecs/demac/libdemac/entropy.c new file mode 100644 index 0000000000..1cef979808 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/entropy.c | |||
@@ -0,0 +1,464 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #include <inttypes.h> | ||
26 | #include <string.h> | ||
27 | |||
28 | #include "parser.h" | ||
29 | #include "entropy.h" | ||
30 | #include "demac_config.h" | ||
31 | |||
32 | #define MODEL_ELEMENTS 64 | ||
33 | |||
34 | /* | ||
35 | The following counts arrays for use with the range decoder are | ||
36 | hard-coded in the Monkey's Audio decoder. | ||
37 | */ | ||
38 | |||
39 | static const int counts_3970[65] ICONST_ATTR_DEMAC = | ||
40 | { | ||
41 | 0,14824,28224,39348,47855,53994,58171,60926, | ||
42 | 62682,63786,64463,64878,65126,65276,65365,65419, | ||
43 | 65450,65469,65480,65487,65491,65493,65494,65495, | ||
44 | 65496,65497,65498,65499,65500,65501,65502,65503, | ||
45 | 65504,65505,65506,65507,65508,65509,65510,65511, | ||
46 | 65512,65513,65514,65515,65516,65517,65518,65519, | ||
47 | 65520,65521,65522,65523,65524,65525,65526,65527, | ||
48 | 65528,65529,65530,65531,65532,65533,65534,65535, | ||
49 | 65536 | ||
50 | }; | ||
51 | |||
52 | /* counts_diff_3970[i] = counts_3970[i+1] - counts_3970[i] */ | ||
53 | static const int counts_diff_3970[64] ICONST_ATTR_DEMAC = | ||
54 | { | ||
55 | 14824,13400,11124,8507,6139,4177,2755,1756, | ||
56 | 1104,677,415,248,150,89,54,31, | ||
57 | 19,11,7,4,2,1,1,1, | ||
58 | 1,1,1,1,1,1,1,1, | ||
59 | 1,1,1,1,1,1,1,1, | ||
60 | 1,1,1,1,1,1,1,1, | ||
61 | 1,1,1,1,1,1,1,1, | ||
62 | 1,1,1,1,1,1,1,1 | ||
63 | }; | ||
64 | |||
65 | static const int counts_3980[65] ICONST_ATTR_DEMAC = | ||
66 | { | ||
67 | 0,19578,36160,48417,56323,60899,63265,64435, | ||
68 | 64971,65232,65351,65416,65447,65466,65476,65482, | ||
69 | 65485,65488,65490,65491,65492,65493,65494,65495, | ||
70 | 65496,65497,65498,65499,65500,65501,65502,65503, | ||
71 | 65504,65505,65506,65507,65508,65509,65510,65511, | ||
72 | 65512,65513,65514,65515,65516,65517,65518,65519, | ||
73 | 65520,65521,65522,65523,65524,65525,65526,65527, | ||
74 | 65528,65529,65530,65531,65532,65533,65534,65535, | ||
75 | 65536 | ||
76 | }; | ||
77 | |||
78 | /* counts_diff_3980[i] = counts_3980[i+1] - counts_3980[i] */ | ||
79 | |||
80 | static const int counts_diff_3980[64] ICONST_ATTR_DEMAC = | ||
81 | { | ||
82 | 19578,16582,12257,7906,4576,2366,1170,536, | ||
83 | 261,119,65,31,19,10,6,3, | ||
84 | 3,2,1,1,1,1,1,1, | ||
85 | 1,1,1,1,1,1,1,1, | ||
86 | 1,1,1,1,1,1,1,1, | ||
87 | 1,1,1,1,1,1,1,1, | ||
88 | 1,1,1,1,1,1,1,1, | ||
89 | 1,1,1,1,1,1,1,1 | ||
90 | }; | ||
91 | |||
92 | /* | ||
93 | |||
94 | Range decoder adapted from rangecod.c included in: | ||
95 | |||
96 | http://www.compressconsult.com/rangecoder/rngcod13.zip | ||
97 | |||
98 | rangecod.c range encoding | ||
99 | |||
100 | (c) Michael Schindler | ||
101 | 1997, 1998, 1999, 2000 | ||
102 | http://www.compressconsult.com/ | ||
103 | michael@compressconsult.com | ||
104 | |||
105 | This program is free software; you can redistribute it and/or modify | ||
106 | it under the terms of the GNU General Public License as published by | ||
107 | the Free Software Foundation; either version 2 of the License, or | ||
108 | (at your option) any later version. | ||
109 | |||
110 | |||
111 | The encoding functions were removed, and functions turned into "static | ||
112 | inline" functions. Some minor cosmetic changes were made (e.g. turning | ||
113 | pre-processor symbols into upper-case, removing the rc parameter from | ||
114 | each function (and the RNGC macro)). | ||
115 | |||
116 | */ | ||
117 | |||
118 | /* BITSTREAM READING FUNCTIONS */ | ||
119 | |||
120 | /* We deal with the input data one byte at a time - to ensure | ||
121 | functionality on CPUs of any endianness regardless of any requirements | ||
122 | for aligned reads. | ||
123 | */ | ||
124 | |||
125 | static unsigned char* bytebuffer IBSS_ATTR_DEMAC; | ||
126 | static int bytebufferoffset IBSS_ATTR_DEMAC; | ||
127 | |||
128 | static inline void skip_byte(void) | ||
129 | { | ||
130 | bytebufferoffset--; | ||
131 | bytebuffer += bytebufferoffset & 4; | ||
132 | bytebufferoffset &= 3; | ||
133 | } | ||
134 | |||
135 | static inline int read_byte(void) | ||
136 | { | ||
137 | int ch = bytebuffer[bytebufferoffset]; | ||
138 | |||
139 | skip_byte(); | ||
140 | |||
141 | return ch; | ||
142 | } | ||
143 | |||
144 | /* RANGE DECODING FUNCTIONS */ | ||
145 | |||
146 | /* SIZE OF RANGE ENCODING CODE VALUES. */ | ||
147 | |||
148 | #define CODE_BITS 32 | ||
149 | #define TOP_VALUE ((unsigned int)1 << (CODE_BITS-1)) | ||
150 | #define SHIFT_BITS (CODE_BITS - 9) | ||
151 | #define EXTRA_BITS ((CODE_BITS-2) % 8 + 1) | ||
152 | #define BOTTOM_VALUE (TOP_VALUE >> 8) | ||
153 | |||
154 | struct rangecoder_t | ||
155 | { | ||
156 | uint32_t low; /* low end of interval */ | ||
157 | uint32_t range; /* length of interval */ | ||
158 | uint32_t help; /* bytes_to_follow resp. intermediate value */ | ||
159 | unsigned int buffer; /* buffer for input/output */ | ||
160 | }; | ||
161 | |||
162 | static struct rangecoder_t rc IBSS_ATTR_DEMAC; | ||
163 | |||
164 | /* Start the decoder */ | ||
165 | static inline void range_start_decoding(void) | ||
166 | { | ||
167 | rc.buffer = read_byte(); | ||
168 | rc.low = rc.buffer >> (8 - EXTRA_BITS); | ||
169 | rc.range = (uint32_t) 1 << EXTRA_BITS; | ||
170 | } | ||
171 | |||
172 | static inline void range_dec_normalize(void) | ||
173 | { | ||
174 | while (rc.range <= BOTTOM_VALUE) | ||
175 | { | ||
176 | rc.buffer = (rc.buffer << 8) | read_byte(); | ||
177 | rc.low = (rc.low << 8) | ((rc.buffer >> 1) & 0xff); | ||
178 | rc.range <<= 8; | ||
179 | } | ||
180 | } | ||
181 | |||
182 | /* Calculate culmulative frequency for next symbol. Does NO update!*/ | ||
183 | /* tot_f is the total frequency */ | ||
184 | /* or: totf is (code_value)1<<shift */ | ||
185 | /* returns the culmulative frequency */ | ||
186 | static inline int range_decode_culfreq(int tot_f) | ||
187 | { | ||
188 | range_dec_normalize(); | ||
189 | rc.help = UDIV32(rc.range, tot_f); | ||
190 | return UDIV32(rc.low, rc.help); | ||
191 | } | ||
192 | |||
193 | static inline int range_decode_culshift(int shift) | ||
194 | { | ||
195 | range_dec_normalize(); | ||
196 | rc.help = rc.range >> shift; | ||
197 | return UDIV32(rc.low, rc.help); | ||
198 | } | ||
199 | |||
200 | |||
201 | /* Update decoding state */ | ||
202 | /* sy_f is the interval length (frequency of the symbol) */ | ||
203 | /* lt_f is the lower end (frequency sum of < symbols) */ | ||
204 | static inline void range_decode_update(int sy_f, int lt_f) | ||
205 | { | ||
206 | rc.low -= rc.help * lt_f; | ||
207 | rc.range = rc.help * sy_f; | ||
208 | } | ||
209 | |||
210 | |||
211 | /* Decode a byte/short without modelling */ | ||
212 | static inline unsigned char decode_byte(void) | ||
213 | { int tmp = range_decode_culshift(8); | ||
214 | range_decode_update( 1,tmp); | ||
215 | return tmp; | ||
216 | } | ||
217 | |||
218 | static inline unsigned short range_decode_short(void) | ||
219 | { int tmp = range_decode_culshift(16); | ||
220 | range_decode_update( 1,tmp); | ||
221 | return tmp; | ||
222 | } | ||
223 | |||
224 | /* Decode n bits (n <= 16) without modelling - based on range_decode_short */ | ||
225 | static inline int range_decode_bits(int n) | ||
226 | { int tmp = range_decode_culshift(n); | ||
227 | range_decode_update( 1,tmp); | ||
228 | return tmp; | ||
229 | } | ||
230 | |||
231 | |||
232 | /* Finish decoding */ | ||
233 | static inline void range_done_decoding(void) | ||
234 | { range_dec_normalize(); /* normalize to use up all bytes */ | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | range_get_symbol_* functions based on main decoding loop in simple_d.c from | ||
239 | http://www.compressconsult.com/rangecoder/rngcod13.zip | ||
240 | (c) Michael Schindler | ||
241 | */ | ||
242 | |||
243 | static inline int range_get_symbol_3980(void) | ||
244 | { | ||
245 | int symbol, cf; | ||
246 | |||
247 | cf = range_decode_culshift(16); | ||
248 | |||
249 | /* figure out the symbol inefficiently; a binary search would be much better */ | ||
250 | for (symbol = 0; counts_3980[symbol+1] <= cf; symbol++); | ||
251 | |||
252 | range_decode_update(counts_diff_3980[symbol],counts_3980[symbol]); | ||
253 | |||
254 | return symbol; | ||
255 | } | ||
256 | |||
257 | static inline int range_get_symbol_3970(void) | ||
258 | { | ||
259 | int symbol, cf; | ||
260 | |||
261 | cf = range_decode_culshift(16); | ||
262 | |||
263 | /* figure out the symbol inefficiently; a binary search would be much better */ | ||
264 | for (symbol = 0; counts_3970[symbol+1] <= cf; symbol++); | ||
265 | |||
266 | range_decode_update(counts_diff_3970[symbol],counts_3970[symbol]); | ||
267 | |||
268 | return symbol; | ||
269 | } | ||
270 | |||
271 | /* MAIN DECODING FUNCTIONS */ | ||
272 | |||
273 | struct rice_t | ||
274 | { | ||
275 | uint32_t k; | ||
276 | uint32_t ksum; | ||
277 | }; | ||
278 | |||
279 | static struct rice_t riceX IBSS_ATTR_DEMAC; | ||
280 | static struct rice_t riceY IBSS_ATTR_DEMAC; | ||
281 | |||
282 | static inline void update_rice(struct rice_t* rice, int x) | ||
283 | { | ||
284 | rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5); | ||
285 | |||
286 | if (UNLIKELY(rice->k == 0)) { | ||
287 | rice->k = 1; | ||
288 | } else { | ||
289 | uint32_t lim = 1 << (rice->k + 4); | ||
290 | if (UNLIKELY(rice->ksum < lim)) { | ||
291 | rice->k--; | ||
292 | } else if (UNLIKELY(rice->ksum >= 2 * lim)) { | ||
293 | rice->k++; | ||
294 | } | ||
295 | } | ||
296 | } | ||
297 | |||
298 | static inline int entropy_decode3980(struct rice_t* rice) | ||
299 | { | ||
300 | int base, x, pivot, overflow; | ||
301 | |||
302 | pivot = rice->ksum >> 5; | ||
303 | if (UNLIKELY(pivot == 0)) | ||
304 | pivot=1; | ||
305 | |||
306 | overflow = range_get_symbol_3980(); | ||
307 | |||
308 | if (UNLIKELY(overflow == (MODEL_ELEMENTS-1))) { | ||
309 | overflow = range_decode_short() << 16; | ||
310 | overflow |= range_decode_short(); | ||
311 | } | ||
312 | |||
313 | if (pivot >= 0x10000) { | ||
314 | /* Codepath for 24-bit streams */ | ||
315 | int nbits, lo_bits, base_hi, base_lo; | ||
316 | |||
317 | /* Count the number of bits in pivot */ | ||
318 | nbits = 17; /* We know there must be at least 17 bits */ | ||
319 | while ((pivot >> nbits) > 0) { nbits++; } | ||
320 | |||
321 | /* base_lo is the low (nbits-16) bits of base | ||
322 | base_hi is the high 16 bits of base | ||
323 | */ | ||
324 | lo_bits = (nbits - 16); | ||
325 | |||
326 | base_hi = range_decode_culfreq((pivot >> lo_bits) + 1); | ||
327 | range_decode_update(1, base_hi); | ||
328 | |||
329 | base_lo = range_decode_culshift(lo_bits); | ||
330 | range_decode_update(1, base_lo); | ||
331 | |||
332 | base = (base_hi << lo_bits) + base_lo; | ||
333 | } else { | ||
334 | /* Codepath for 16-bit streams */ | ||
335 | base = range_decode_culfreq(pivot); | ||
336 | range_decode_update(1, base); | ||
337 | } | ||
338 | |||
339 | x = base + (overflow * pivot); | ||
340 | update_rice(rice, x); | ||
341 | |||
342 | /* Convert to signed */ | ||
343 | if (x & 1) | ||
344 | return (x >> 1) + 1; | ||
345 | else | ||
346 | return -(x >> 1); | ||
347 | } | ||
348 | |||
349 | |||
350 | static inline int entropy_decode3970(struct rice_t* rice) | ||
351 | { | ||
352 | int x, tmpk; | ||
353 | |||
354 | int overflow = range_get_symbol_3970(); | ||
355 | |||
356 | if (UNLIKELY(overflow == (MODEL_ELEMENTS - 1))) { | ||
357 | tmpk = range_decode_bits(5); | ||
358 | overflow = 0; | ||
359 | } else { | ||
360 | tmpk = (rice->k < 1) ? 0 : rice->k - 1; | ||
361 | } | ||
362 | |||
363 | if (tmpk <= 16) { | ||
364 | x = range_decode_bits(tmpk); | ||
365 | } else { | ||
366 | x = range_decode_short(); | ||
367 | x |= (range_decode_bits(tmpk - 16) << 16); | ||
368 | } | ||
369 | x += (overflow << tmpk); | ||
370 | |||
371 | update_rice(rice, x); | ||
372 | |||
373 | /* Convert to signed */ | ||
374 | if (x & 1) | ||
375 | return (x >> 1) + 1; | ||
376 | else | ||
377 | return -(x >> 1); | ||
378 | } | ||
379 | |||
380 | void init_entropy_decoder(struct ape_ctx_t* ape_ctx, | ||
381 | unsigned char* inbuffer, int* firstbyte, | ||
382 | int* bytesconsumed) | ||
383 | { | ||
384 | bytebuffer = inbuffer; | ||
385 | bytebufferoffset = *firstbyte; | ||
386 | |||
387 | /* Read the CRC */ | ||
388 | ape_ctx->CRC = read_byte(); | ||
389 | ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte(); | ||
390 | ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte(); | ||
391 | ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte(); | ||
392 | |||
393 | /* Read the frame flags if they exist */ | ||
394 | ape_ctx->frameflags = 0; | ||
395 | if ((ape_ctx->fileversion > 3820) && (ape_ctx->CRC & 0x80000000)) { | ||
396 | ape_ctx->CRC &= ~0x80000000; | ||
397 | |||
398 | ape_ctx->frameflags = read_byte(); | ||
399 | ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte(); | ||
400 | ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte(); | ||
401 | ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte(); | ||
402 | } | ||
403 | /* Keep a count of the blocks decoded in this frame */ | ||
404 | ape_ctx->blocksdecoded = 0; | ||
405 | |||
406 | /* Initialise the rice structs */ | ||
407 | riceX.k = 10; | ||
408 | riceX.ksum = (1 << riceX.k) * 16; | ||
409 | riceY.k = 10; | ||
410 | riceY.ksum = (1 << riceY.k) * 16; | ||
411 | |||
412 | /* The first 8 bits of input are ignored. */ | ||
413 | skip_byte(); | ||
414 | |||
415 | range_start_decoding(); | ||
416 | |||
417 | /* Return the new state of the buffer */ | ||
418 | *bytesconsumed = (intptr_t)bytebuffer - (intptr_t)inbuffer; | ||
419 | *firstbyte = bytebufferoffset; | ||
420 | } | ||
421 | |||
422 | void ICODE_ATTR_DEMAC entropy_decode(struct ape_ctx_t* ape_ctx, | ||
423 | unsigned char* inbuffer, int* firstbyte, | ||
424 | int* bytesconsumed, | ||
425 | int32_t* decoded0, int32_t* decoded1, | ||
426 | int blockstodecode) | ||
427 | { | ||
428 | bytebuffer = inbuffer; | ||
429 | bytebufferoffset = *firstbyte; | ||
430 | |||
431 | ape_ctx->blocksdecoded += blockstodecode; | ||
432 | |||
433 | if ((ape_ctx->frameflags & APE_FRAMECODE_LEFT_SILENCE) | ||
434 | && ((ape_ctx->frameflags & APE_FRAMECODE_RIGHT_SILENCE) | ||
435 | || (decoded1 == NULL))) { | ||
436 | /* We are pure silence, just memset the output buffer. */ | ||
437 | memset(decoded0, 0, blockstodecode * sizeof(int32_t)); | ||
438 | if (decoded1 != NULL) | ||
439 | memset(decoded1, 0, blockstodecode * sizeof(int32_t)); | ||
440 | } else { | ||
441 | if (ape_ctx->fileversion > 3970) { | ||
442 | while (LIKELY(blockstodecode--)) { | ||
443 | *(decoded0++) = entropy_decode3980(&riceY); | ||
444 | if (decoded1 != NULL) | ||
445 | *(decoded1++) = entropy_decode3980(&riceX); | ||
446 | } | ||
447 | } else { | ||
448 | while (LIKELY(blockstodecode--)) { | ||
449 | *(decoded0++) = entropy_decode3970(&riceY); | ||
450 | if (decoded1 != NULL) | ||
451 | *(decoded1++) = entropy_decode3970(&riceX); | ||
452 | } | ||
453 | } | ||
454 | } | ||
455 | |||
456 | if (ape_ctx->blocksdecoded == ape_ctx->currentframeblocks) | ||
457 | { | ||
458 | range_done_decoding(); | ||
459 | } | ||
460 | |||
461 | /* Return the new state of the buffer */ | ||
462 | *bytesconsumed = bytebuffer - inbuffer; | ||
463 | *firstbyte = bytebufferoffset; | ||
464 | } | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/entropy.h b/lib/rbcodec/codecs/demac/libdemac/entropy.h new file mode 100644 index 0000000000..fac2a44d99 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/entropy.h | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #ifndef _APE_ENTROPY_H | ||
26 | #define _APE_ENTROPY_H | ||
27 | |||
28 | #include <inttypes.h> | ||
29 | |||
30 | void init_entropy_decoder(struct ape_ctx_t* ape_ctx, | ||
31 | unsigned char* inbuffer, int* firstbyte, | ||
32 | int* bytesconsumed); | ||
33 | |||
34 | void entropy_decode(struct ape_ctx_t* ape_ctx, | ||
35 | unsigned char* inbuffer, int* firstbyte, | ||
36 | int* bytesconsumed, | ||
37 | int32_t* decoded0, int32_t* decoded1, | ||
38 | int blockstodecode); | ||
39 | |||
40 | #endif | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter.c b/lib/rbcodec/codecs/demac/libdemac/filter.c new file mode 100644 index 0000000000..903885cf00 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/filter.c | |||
@@ -0,0 +1,296 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #include <string.h> | ||
26 | #include <inttypes.h> | ||
27 | |||
28 | #include "demac.h" | ||
29 | #include "filter.h" | ||
30 | #include "demac_config.h" | ||
31 | |||
32 | #if FILTER_BITS == 32 | ||
33 | |||
34 | #if defined(CPU_ARM) && (ARM_ARCH == 4) | ||
35 | #include "vector_math32_armv4.h" | ||
36 | #else | ||
37 | #include "vector_math_generic.h" | ||
38 | #endif | ||
39 | |||
40 | #else /* FILTER_BITS == 16 */ | ||
41 | |||
42 | #ifdef CPU_COLDFIRE | ||
43 | #include "vector_math16_cf.h" | ||
44 | #elif defined(CPU_ARM) && (ARM_ARCH >= 7) | ||
45 | #include "vector_math16_armv7.h" | ||
46 | #elif defined(CPU_ARM) && (ARM_ARCH >= 6) | ||
47 | #include "vector_math16_armv6.h" | ||
48 | #elif defined(CPU_ARM) && (ARM_ARCH >= 5) | ||
49 | /* Assume all our ARMv5 targets are ARMv5te(j) */ | ||
50 | #include "vector_math16_armv5te.h" | ||
51 | #elif (defined(__i386__) || defined(__i486__)) && defined(__MMX__) \ | ||
52 | || defined(__x86_64__) | ||
53 | #include "vector_math16_mmx.h" | ||
54 | #else | ||
55 | #include "vector_math_generic.h" | ||
56 | #endif | ||
57 | |||
58 | #endif /* FILTER_BITS */ | ||
59 | |||
60 | struct filter_t { | ||
61 | filter_int* coeffs; /* ORDER entries */ | ||
62 | |||
63 | /* We store all the filter delays in a single buffer */ | ||
64 | filter_int* history_end; | ||
65 | |||
66 | filter_int* delay; | ||
67 | filter_int* adaptcoeffs; | ||
68 | |||
69 | int avg; | ||
70 | }; | ||
71 | |||
72 | /* We name the functions according to the ORDER and FRACBITS | ||
73 | pre-processor symbols and build multiple .o files from this .c file | ||
74 | - this increases code-size but gives the compiler more scope for | ||
75 | optimising the individual functions, as well as replacing a lot of | ||
76 | variables with constants. | ||
77 | */ | ||
78 | |||
79 | #if FRACBITS == 11 | ||
80 | #if ORDER == 16 | ||
81 | #define INIT_FILTER init_filter_16_11 | ||
82 | #define APPLY_FILTER apply_filter_16_11 | ||
83 | #elif ORDER == 64 | ||
84 | #define INIT_FILTER init_filter_64_11 | ||
85 | #define APPLY_FILTER apply_filter_64_11 | ||
86 | #endif | ||
87 | #elif FRACBITS == 13 | ||
88 | #define INIT_FILTER init_filter_256_13 | ||
89 | #define APPLY_FILTER apply_filter_256_13 | ||
90 | #elif FRACBITS == 10 | ||
91 | #define INIT_FILTER init_filter_32_10 | ||
92 | #define APPLY_FILTER apply_filter_32_10 | ||
93 | #elif FRACBITS == 15 | ||
94 | #define INIT_FILTER init_filter_1280_15 | ||
95 | #define APPLY_FILTER apply_filter_1280_15 | ||
96 | #endif | ||
97 | |||
98 | /* Some macros to handle the fixed-point stuff */ | ||
99 | |||
100 | /* Convert from (32-FRACBITS).FRACBITS fixed-point format to an | ||
101 | integer (rounding to nearest). */ | ||
102 | #define FP_HALF (1 << (FRACBITS - 1)) /* 0.5 in fixed-point format. */ | ||
103 | #define FP_TO_INT(x) ((x + FP_HALF) >> FRACBITS) /* round(x) */ | ||
104 | |||
105 | #ifdef CPU_ARM | ||
106 | #if ARM_ARCH >= 6 | ||
107 | #define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; }) | ||
108 | #else /* ARM_ARCH < 6 */ | ||
109 | /* Keeping the asr #31 outside of the asm allows loads to be scheduled between | ||
110 | it and the rest of the block on ARM9E, with the load's result latency filled | ||
111 | by the other calculations. */ | ||
112 | #define SATURATE(x) ({ \ | ||
113 | int __res = (x) >> 31; \ | ||
114 | asm volatile ( \ | ||
115 | "teq %0, %1, asr #15\n\t" \ | ||
116 | "moveq %0, %1\n\t" \ | ||
117 | "eorne %0, %0, #0xff\n\t" \ | ||
118 | "eorne %0, %0, #0x7f00" \ | ||
119 | : "+r" (__res) : "r" (x) : "cc" \ | ||
120 | ); \ | ||
121 | __res; \ | ||
122 | }) | ||
123 | #endif /* ARM_ARCH */ | ||
124 | #else /* CPU_ARM */ | ||
125 | #define SATURATE(x) (LIKELY((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF) | ||
126 | #endif | ||
127 | |||
128 | /* Apply the filter with state f to count entries in data[] */ | ||
129 | |||
130 | static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f, | ||
131 | int32_t* data, int count) | ||
132 | { | ||
133 | int res; | ||
134 | int absres; | ||
135 | |||
136 | #ifdef PREPARE_SCALARPRODUCT | ||
137 | PREPARE_SCALARPRODUCT | ||
138 | #endif | ||
139 | |||
140 | while(LIKELY(count--)) | ||
141 | { | ||
142 | #ifdef FUSED_VECTOR_MATH | ||
143 | if (LIKELY(*data != 0)) { | ||
144 | if (*data < 0) | ||
145 | res = vector_sp_add(f->coeffs, f->delay - ORDER, | ||
146 | f->adaptcoeffs - ORDER); | ||
147 | else | ||
148 | res = vector_sp_sub(f->coeffs, f->delay - ORDER, | ||
149 | f->adaptcoeffs - ORDER); | ||
150 | } else { | ||
151 | res = scalarproduct(f->coeffs, f->delay - ORDER); | ||
152 | } | ||
153 | res = FP_TO_INT(res); | ||
154 | #else | ||
155 | res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER)); | ||
156 | |||
157 | if (LIKELY(*data != 0)) { | ||
158 | if (*data < 0) | ||
159 | vector_add(f->coeffs, f->adaptcoeffs - ORDER); | ||
160 | else | ||
161 | vector_sub(f->coeffs, f->adaptcoeffs - ORDER); | ||
162 | } | ||
163 | #endif | ||
164 | |||
165 | res += *data; | ||
166 | |||
167 | *data++ = res; | ||
168 | |||
169 | /* Update the output history */ | ||
170 | *f->delay++ = SATURATE(res); | ||
171 | |||
172 | /* Version 3.98 and later files */ | ||
173 | |||
174 | /* Update the adaption coefficients */ | ||
175 | absres = (res < 0 ? -res : res); | ||
176 | |||
177 | if (UNLIKELY(absres > 3 * f->avg)) | ||
178 | *f->adaptcoeffs = ((res >> 25) & 64) - 32; | ||
179 | else if (3 * absres > 4 * f->avg) | ||
180 | *f->adaptcoeffs = ((res >> 26) & 32) - 16; | ||
181 | else if (LIKELY(absres > 0)) | ||
182 | *f->adaptcoeffs = ((res >> 27) & 16) - 8; | ||
183 | else | ||
184 | *f->adaptcoeffs = 0; | ||
185 | |||
186 | f->avg += (absres - f->avg) / 16; | ||
187 | |||
188 | f->adaptcoeffs[-1] >>= 1; | ||
189 | f->adaptcoeffs[-2] >>= 1; | ||
190 | f->adaptcoeffs[-8] >>= 1; | ||
191 | |||
192 | f->adaptcoeffs++; | ||
193 | |||
194 | /* Have we filled the history buffer? */ | ||
195 | if (UNLIKELY(f->delay == f->history_end)) { | ||
196 | memmove(f->coeffs + ORDER, f->delay - (ORDER*2), | ||
197 | (ORDER*2) * sizeof(filter_int)); | ||
198 | f->adaptcoeffs = f->coeffs + ORDER*2; | ||
199 | f->delay = f->coeffs + ORDER*3; | ||
200 | } | ||
201 | } | ||
202 | } | ||
203 | |||
204 | static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f, | ||
205 | int32_t* data, int count) | ||
206 | { | ||
207 | int res; | ||
208 | |||
209 | #ifdef PREPARE_SCALARPRODUCT | ||
210 | PREPARE_SCALARPRODUCT | ||
211 | #endif | ||
212 | |||
213 | while(LIKELY(count--)) | ||
214 | { | ||
215 | #ifdef FUSED_VECTOR_MATH | ||
216 | if (LIKELY(*data != 0)) { | ||
217 | if (*data < 0) | ||
218 | res = vector_sp_add(f->coeffs, f->delay - ORDER, | ||
219 | f->adaptcoeffs - ORDER); | ||
220 | else | ||
221 | res = vector_sp_sub(f->coeffs, f->delay - ORDER, | ||
222 | f->adaptcoeffs - ORDER); | ||
223 | } else { | ||
224 | res = scalarproduct(f->coeffs, f->delay - ORDER); | ||
225 | } | ||
226 | res = FP_TO_INT(res); | ||
227 | #else | ||
228 | res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER)); | ||
229 | |||
230 | if (LIKELY(*data != 0)) { | ||
231 | if (*data < 0) | ||
232 | vector_add(f->coeffs, f->adaptcoeffs - ORDER); | ||
233 | else | ||
234 | vector_sub(f->coeffs, f->adaptcoeffs - ORDER); | ||
235 | } | ||
236 | #endif | ||
237 | |||
238 | /* Convert res from (32-FRACBITS).FRACBITS fixed-point format to an | ||
239 | integer (rounding to nearest) and add the input value to | ||
240 | it */ | ||
241 | res += *data; | ||
242 | |||
243 | *data++ = res; | ||
244 | |||
245 | /* Update the output history */ | ||
246 | *f->delay++ = SATURATE(res); | ||
247 | |||
248 | /* Version ??? to < 3.98 files (untested) */ | ||
249 | f->adaptcoeffs[0] = (res == 0) ? 0 : ((res >> 28) & 8) - 4; | ||
250 | f->adaptcoeffs[-4] >>= 1; | ||
251 | f->adaptcoeffs[-8] >>= 1; | ||
252 | |||
253 | f->adaptcoeffs++; | ||
254 | |||
255 | /* Have we filled the history buffer? */ | ||
256 | if (UNLIKELY(f->delay == f->history_end)) { | ||
257 | memmove(f->coeffs + ORDER, f->delay - (ORDER*2), | ||
258 | (ORDER*2) * sizeof(filter_int)); | ||
259 | f->adaptcoeffs = f->coeffs + ORDER*2; | ||
260 | f->delay = f->coeffs + ORDER*3; | ||
261 | } | ||
262 | } | ||
263 | } | ||
264 | |||
265 | static struct filter_t filter[2] IBSS_ATTR_DEMAC; | ||
266 | |||
267 | static void do_init_filter(struct filter_t* f, filter_int* buf) | ||
268 | { | ||
269 | f->coeffs = buf; | ||
270 | f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE; | ||
271 | |||
272 | /* Init pointers */ | ||
273 | f->adaptcoeffs = f->coeffs + ORDER*2; | ||
274 | f->delay = f->coeffs + ORDER*3; | ||
275 | |||
276 | /* Zero coefficients and history buffer */ | ||
277 | memset(f->coeffs, 0, ORDER*3 * sizeof(filter_int)); | ||
278 | |||
279 | /* Zero the running average */ | ||
280 | f->avg = 0; | ||
281 | } | ||
282 | |||
283 | void INIT_FILTER(filter_int* buf) | ||
284 | { | ||
285 | do_init_filter(&filter[0], buf); | ||
286 | do_init_filter(&filter[1], buf + ORDER*3 + FILTER_HISTORY_SIZE); | ||
287 | } | ||
288 | |||
289 | void ICODE_ATTR_DEMAC APPLY_FILTER(int fileversion, int channel, | ||
290 | int32_t* data, int count) | ||
291 | { | ||
292 | if (fileversion >= 3980) | ||
293 | do_apply_filter_3980(&filter[channel], data, count); | ||
294 | else | ||
295 | do_apply_filter_3970(&filter[channel], data, count); | ||
296 | } | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter.h b/lib/rbcodec/codecs/demac/libdemac/filter.h new file mode 100644 index 0000000000..609ea12496 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/filter.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #ifndef _APE_FILTER_H | ||
26 | #define _APE_FILTER_H | ||
27 | |||
28 | #include "demac_config.h" | ||
29 | |||
30 | void init_filter_16_11(filter_int* buf); | ||
31 | void apply_filter_16_11(int fileversion, int channel, | ||
32 | int32_t* decoded, int count); | ||
33 | |||
34 | void init_filter_64_11(filter_int* buf); | ||
35 | void apply_filter_64_11(int fileversion, int channel, | ||
36 | int32_t* decoded, int count); | ||
37 | |||
38 | void init_filter_32_10(filter_int* buf); | ||
39 | void apply_filter_32_10(int fileversion, int channel, | ||
40 | int32_t* decoded, int count); | ||
41 | |||
42 | void init_filter_256_13(filter_int* buf); | ||
43 | void apply_filter_256_13(int fileversion, int channel, | ||
44 | int32_t* decoded, int count); | ||
45 | |||
46 | void init_filter_1280_15(filter_int* buf); | ||
47 | void apply_filter_1280_15(int fileversion, int channel, | ||
48 | int32_t* decoded, int count); | ||
49 | |||
50 | #endif | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter_1280_15.c b/lib/rbcodec/codecs/demac/libdemac/filter_1280_15.c new file mode 100644 index 0000000000..f2301fb02a --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/filter_1280_15.c | |||
@@ -0,0 +1,32 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #include "demac_config.h" | ||
26 | #ifndef FILTER256_IRAM | ||
27 | #undef ICODE_ATTR_DEMAC | ||
28 | #define ICODE_ATTR_DEMAC | ||
29 | #endif | ||
30 | #define ORDER 1280 | ||
31 | #define FRACBITS 15 | ||
32 | #include "filter.c" | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter_16_11.c b/lib/rbcodec/codecs/demac/libdemac/filter_16_11.c new file mode 100644 index 0000000000..94c56e247f --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/filter_16_11.c | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #define ORDER 16 | ||
26 | #define FRACBITS 11 | ||
27 | #include "filter.c" | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter_256_13.c b/lib/rbcodec/codecs/demac/libdemac/filter_256_13.c new file mode 100644 index 0000000000..9e4b9fcb13 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/filter_256_13.c | |||
@@ -0,0 +1,32 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #include "demac_config.h" | ||
26 | #ifndef FILTER256_IRAM | ||
27 | #undef ICODE_ATTR_DEMAC | ||
28 | #define ICODE_ATTR_DEMAC | ||
29 | #endif | ||
30 | #define ORDER 256 | ||
31 | #define FRACBITS 13 | ||
32 | #include "filter.c" | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter_32_10.c b/lib/rbcodec/codecs/demac/libdemac/filter_32_10.c new file mode 100644 index 0000000000..5ec85089db --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/filter_32_10.c | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #define ORDER 32 | ||
26 | #define FRACBITS 10 | ||
27 | #include "filter.c" | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter_64_11.c b/lib/rbcodec/codecs/demac/libdemac/filter_64_11.c new file mode 100644 index 0000000000..cd74fa5f6b --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/filter_64_11.c | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #define ORDER 64 | ||
26 | #define FRACBITS 11 | ||
27 | #include "filter.c" | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/parser.c b/lib/rbcodec/codecs/demac/libdemac/parser.c new file mode 100644 index 0000000000..2af4a292b8 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/parser.c | |||
@@ -0,0 +1,402 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #include <inttypes.h> | ||
26 | #include <string.h> | ||
27 | #ifndef ROCKBOX | ||
28 | #include <stdio.h> | ||
29 | #include <stdlib.h> | ||
30 | #include "inttypes.h" | ||
31 | #include <sys/stat.h> | ||
32 | #include <fcntl.h> | ||
33 | #include <unistd.h> | ||
34 | #endif | ||
35 | |||
36 | #include "parser.h" | ||
37 | |||
38 | #ifdef APE_MAX | ||
39 | #undef APE_MAX | ||
40 | #endif | ||
41 | #define APE_MAX(a,b) ((a)>(b)?(a):(b)) | ||
42 | |||
43 | |||
44 | static inline int16_t get_int16(unsigned char* buf) | ||
45 | { | ||
46 | return(buf[0] | (buf[1] << 8)); | ||
47 | } | ||
48 | |||
49 | static inline uint16_t get_uint16(unsigned char* buf) | ||
50 | { | ||
51 | return(buf[0] | (buf[1] << 8)); | ||
52 | } | ||
53 | |||
54 | static inline uint32_t get_uint32(unsigned char* buf) | ||
55 | { | ||
56 | return(buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24)); | ||
57 | } | ||
58 | |||
59 | |||
60 | int ape_parseheaderbuf(unsigned char* buf, struct ape_ctx_t* ape_ctx) | ||
61 | { | ||
62 | unsigned char* header; | ||
63 | |||
64 | memset(ape_ctx,0,sizeof(struct ape_ctx_t)); | ||
65 | /* TODO: Skip any leading junk such as id3v2 tags */ | ||
66 | ape_ctx->junklength = 0; | ||
67 | |||
68 | memcpy(ape_ctx->magic, buf, 4); | ||
69 | if (memcmp(ape_ctx->magic,"MAC ",4)!=0) | ||
70 | { | ||
71 | return -1; | ||
72 | } | ||
73 | |||
74 | ape_ctx->fileversion = get_int16(buf + 4); | ||
75 | |||
76 | if (ape_ctx->fileversion >= 3980) | ||
77 | { | ||
78 | ape_ctx->padding1 = get_int16(buf + 6); | ||
79 | ape_ctx->descriptorlength = get_uint32(buf + 8); | ||
80 | ape_ctx->headerlength = get_uint32(buf + 12); | ||
81 | ape_ctx->seektablelength = get_uint32(buf + 16); | ||
82 | ape_ctx->wavheaderlength = get_uint32(buf + 20); | ||
83 | ape_ctx->audiodatalength = get_uint32(buf + 24); | ||
84 | ape_ctx->audiodatalength_high = get_uint32(buf + 28); | ||
85 | ape_ctx->wavtaillength = get_uint32(buf + 32); | ||
86 | memcpy(ape_ctx->md5, buf + 36, 16); | ||
87 | |||
88 | header = buf + ape_ctx->descriptorlength; | ||
89 | |||
90 | /* Read header data */ | ||
91 | ape_ctx->compressiontype = get_uint16(header + 0); | ||
92 | ape_ctx->formatflags = get_uint16(header + 2); | ||
93 | ape_ctx->blocksperframe = get_uint32(header + 4); | ||
94 | ape_ctx->finalframeblocks = get_uint32(header + 8); | ||
95 | ape_ctx->totalframes = get_uint32(header + 12); | ||
96 | ape_ctx->bps = get_uint16(header + 16); | ||
97 | ape_ctx->channels = get_uint16(header + 18); | ||
98 | ape_ctx->samplerate = get_uint32(header + 20); | ||
99 | |||
100 | ape_ctx->seektablefilepos = ape_ctx->junklength + | ||
101 | ape_ctx->descriptorlength + | ||
102 | ape_ctx->headerlength; | ||
103 | |||
104 | ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->descriptorlength + | ||
105 | ape_ctx->headerlength + ape_ctx->seektablelength + | ||
106 | ape_ctx->wavheaderlength; | ||
107 | } else { | ||
108 | ape_ctx->headerlength = 32; | ||
109 | ape_ctx->compressiontype = get_uint16(buf + 6); | ||
110 | ape_ctx->formatflags = get_uint16(buf + 8); | ||
111 | ape_ctx->channels = get_uint16(buf + 10); | ||
112 | ape_ctx->samplerate = get_uint32(buf + 12); | ||
113 | ape_ctx->wavheaderlength = get_uint32(buf + 16); | ||
114 | ape_ctx->totalframes = get_uint32(buf + 24); | ||
115 | ape_ctx->finalframeblocks = get_uint32(buf + 28); | ||
116 | |||
117 | if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL) | ||
118 | { | ||
119 | ape_ctx->headerlength += 4; | ||
120 | } | ||
121 | |||
122 | if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS) | ||
123 | { | ||
124 | ape_ctx->seektablelength = get_uint32(buf + ape_ctx->headerlength); | ||
125 | ape_ctx->seektablelength *= sizeof(int32_t); | ||
126 | ape_ctx->headerlength += 4; | ||
127 | } else { | ||
128 | ape_ctx->seektablelength = ape_ctx->totalframes * sizeof(int32_t); | ||
129 | } | ||
130 | |||
131 | if (ape_ctx->formatflags & MAC_FORMAT_FLAG_8_BIT) | ||
132 | ape_ctx->bps = 8; | ||
133 | else if (ape_ctx->formatflags & MAC_FORMAT_FLAG_24_BIT) | ||
134 | ape_ctx->bps = 24; | ||
135 | else | ||
136 | ape_ctx->bps = 16; | ||
137 | |||
138 | if (ape_ctx->fileversion >= 3950) | ||
139 | ape_ctx->blocksperframe = 73728 * 4; | ||
140 | else if ((ape_ctx->fileversion >= 3900) || (ape_ctx->fileversion >= 3800 && ape_ctx->compressiontype >= 4000)) | ||
141 | ape_ctx->blocksperframe = 73728; | ||
142 | else | ||
143 | ape_ctx->blocksperframe = 9216; | ||
144 | |||
145 | ape_ctx->seektablefilepos = ape_ctx->junklength + ape_ctx->headerlength + | ||
146 | ape_ctx->wavheaderlength; | ||
147 | |||
148 | ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->headerlength + | ||
149 | ape_ctx->wavheaderlength + ape_ctx->seektablelength; | ||
150 | } | ||
151 | |||
152 | ape_ctx->totalsamples = ape_ctx->finalframeblocks; | ||
153 | if (ape_ctx->totalframes > 1) | ||
154 | ape_ctx->totalsamples += ape_ctx->blocksperframe * (ape_ctx->totalframes-1); | ||
155 | |||
156 | ape_ctx->numseekpoints = APE_MAX(ape_ctx->maxseekpoints, | ||
157 | ape_ctx->seektablelength / sizeof(int32_t)); | ||
158 | |||
159 | return 0; | ||
160 | } | ||
161 | |||
162 | |||
163 | #ifndef ROCKBOX | ||
164 | /* Helper functions */ | ||
165 | |||
166 | static int read_uint16(int fd, uint16_t* x) | ||
167 | { | ||
168 | unsigned char tmp[2]; | ||
169 | int n; | ||
170 | |||
171 | n = read(fd,tmp,2); | ||
172 | |||
173 | if (n != 2) | ||
174 | return -1; | ||
175 | |||
176 | *x = tmp[0] | (tmp[1] << 8); | ||
177 | |||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static int read_int16(int fd, int16_t* x) | ||
182 | { | ||
183 | return read_uint16(fd, (uint16_t*)x); | ||
184 | } | ||
185 | |||
186 | static int read_uint32(int fd, uint32_t* x) | ||
187 | { | ||
188 | unsigned char tmp[4]; | ||
189 | int n; | ||
190 | |||
191 | n = read(fd,tmp,4); | ||
192 | |||
193 | if (n != 4) | ||
194 | return -1; | ||
195 | |||
196 | *x = tmp[0] | (tmp[1] << 8) | (tmp[2] << 16) | (tmp[3] << 24); | ||
197 | |||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | int ape_parseheader(int fd, struct ape_ctx_t* ape_ctx) | ||
202 | { | ||
203 | int i,n; | ||
204 | |||
205 | /* TODO: Skip any leading junk such as id3v2 tags */ | ||
206 | ape_ctx->junklength = 0; | ||
207 | |||
208 | lseek(fd,ape_ctx->junklength,SEEK_SET); | ||
209 | |||
210 | n = read(fd,&ape_ctx->magic,4); | ||
211 | if (n != 4) return -1; | ||
212 | |||
213 | if (memcmp(ape_ctx->magic,"MAC ",4)!=0) | ||
214 | { | ||
215 | return -1; | ||
216 | } | ||
217 | |||
218 | if (read_int16(fd,&ape_ctx->fileversion) < 0) | ||
219 | return -1; | ||
220 | |||
221 | if (ape_ctx->fileversion >= 3980) | ||
222 | { | ||
223 | if (read_int16(fd,&ape_ctx->padding1) < 0) | ||
224 | return -1; | ||
225 | if (read_uint32(fd,&ape_ctx->descriptorlength) < 0) | ||
226 | return -1; | ||
227 | if (read_uint32(fd,&ape_ctx->headerlength) < 0) | ||
228 | return -1; | ||
229 | if (read_uint32(fd,&ape_ctx->seektablelength) < 0) | ||
230 | return -1; | ||
231 | if (read_uint32(fd,&ape_ctx->wavheaderlength) < 0) | ||
232 | return -1; | ||
233 | if (read_uint32(fd,&ape_ctx->audiodatalength) < 0) | ||
234 | return -1; | ||
235 | if (read_uint32(fd,&ape_ctx->audiodatalength_high) < 0) | ||
236 | return -1; | ||
237 | if (read_uint32(fd,&ape_ctx->wavtaillength) < 0) | ||
238 | return -1; | ||
239 | if (read(fd,&ape_ctx->md5,16) != 16) | ||
240 | return -1; | ||
241 | |||
242 | /* Skip any unknown bytes at the end of the descriptor. This is for future | ||
243 | compatibility */ | ||
244 | if (ape_ctx->descriptorlength > 52) | ||
245 | lseek(fd,ape_ctx->descriptorlength - 52, SEEK_CUR); | ||
246 | |||
247 | /* Read header data */ | ||
248 | if (read_uint16(fd,&ape_ctx->compressiontype) < 0) | ||
249 | return -1; | ||
250 | if (read_uint16(fd,&ape_ctx->formatflags) < 0) | ||
251 | return -1; | ||
252 | if (read_uint32(fd,&ape_ctx->blocksperframe) < 0) | ||
253 | return -1; | ||
254 | if (read_uint32(fd,&ape_ctx->finalframeblocks) < 0) | ||
255 | return -1; | ||
256 | if (read_uint32(fd,&ape_ctx->totalframes) < 0) | ||
257 | return -1; | ||
258 | if (read_uint16(fd,&ape_ctx->bps) < 0) | ||
259 | return -1; | ||
260 | if (read_uint16(fd,&ape_ctx->channels) < 0) | ||
261 | return -1; | ||
262 | if (read_uint32(fd,&ape_ctx->samplerate) < 0) | ||
263 | return -1; | ||
264 | } else { | ||
265 | ape_ctx->descriptorlength = 0; | ||
266 | ape_ctx->headerlength = 32; | ||
267 | |||
268 | if (read_uint16(fd,&ape_ctx->compressiontype) < 0) | ||
269 | return -1; | ||
270 | if (read_uint16(fd,&ape_ctx->formatflags) < 0) | ||
271 | return -1; | ||
272 | if (read_uint16(fd,&ape_ctx->channels) < 0) | ||
273 | return -1; | ||
274 | if (read_uint32(fd,&ape_ctx->samplerate) < 0) | ||
275 | return -1; | ||
276 | if (read_uint32(fd,&ape_ctx->wavheaderlength) < 0) | ||
277 | return -1; | ||
278 | if (read_uint32(fd,&ape_ctx->wavtaillength) < 0) | ||
279 | return -1; | ||
280 | if (read_uint32(fd,&ape_ctx->totalframes) < 0) | ||
281 | return -1; | ||
282 | if (read_uint32(fd,&ape_ctx->finalframeblocks) < 0) | ||
283 | return -1; | ||
284 | |||
285 | if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL) | ||
286 | { | ||
287 | lseek(fd, 4, SEEK_CUR); /* Skip the peak level */ | ||
288 | ape_ctx->headerlength += 4; | ||
289 | } | ||
290 | |||
291 | if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS) | ||
292 | { | ||
293 | if (read_uint32(fd,&ape_ctx->seektablelength) < 0) | ||
294 | return -1; | ||
295 | ape_ctx->headerlength += 4; | ||
296 | ape_ctx->seektablelength *= sizeof(int32_t); | ||
297 | } else { | ||
298 | ape_ctx->seektablelength = ape_ctx->totalframes * sizeof(int32_t); | ||
299 | } | ||
300 | |||
301 | if (ape_ctx->formatflags & MAC_FORMAT_FLAG_8_BIT) | ||
302 | ape_ctx->bps = 8; | ||
303 | else if (ape_ctx->formatflags & MAC_FORMAT_FLAG_24_BIT) | ||
304 | ape_ctx->bps = 24; | ||
305 | else | ||
306 | ape_ctx->bps = 16; | ||
307 | |||
308 | if (ape_ctx->fileversion >= 3950) | ||
309 | ape_ctx->blocksperframe = 73728 * 4; | ||
310 | else if ((ape_ctx->fileversion >= 3900) || (ape_ctx->fileversion >= 3800 && ape_ctx->compressiontype >= 4000)) | ||
311 | ape_ctx->blocksperframe = 73728; | ||
312 | else | ||
313 | ape_ctx->blocksperframe = 9216; | ||
314 | |||
315 | /* Skip any stored wav header */ | ||
316 | if (!(ape_ctx->formatflags & MAC_FORMAT_FLAG_CREATE_WAV_HEADER)) | ||
317 | { | ||
318 | lseek(fd, ape_ctx->wavheaderlength, SEEK_CUR); | ||
319 | } | ||
320 | } | ||
321 | |||
322 | ape_ctx->totalsamples = ape_ctx->finalframeblocks; | ||
323 | if (ape_ctx->totalframes > 1) | ||
324 | ape_ctx->totalsamples += ape_ctx->blocksperframe * (ape_ctx->totalframes-1); | ||
325 | |||
326 | if (ape_ctx->seektablelength > 0) | ||
327 | { | ||
328 | ape_ctx->seektable = malloc(ape_ctx->seektablelength); | ||
329 | if (ape_ctx->seektable == NULL) | ||
330 | return -1; | ||
331 | for (i=0; i < ape_ctx->seektablelength / sizeof(uint32_t); i++) | ||
332 | { | ||
333 | if (read_uint32(fd,&ape_ctx->seektable[i]) < 0) | ||
334 | { | ||
335 | free(ape_ctx->seektable); | ||
336 | return -1; | ||
337 | } | ||
338 | } | ||
339 | } | ||
340 | |||
341 | ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->descriptorlength + | ||
342 | ape_ctx->headerlength + ape_ctx->seektablelength + | ||
343 | ape_ctx->wavheaderlength; | ||
344 | |||
345 | return 0; | ||
346 | } | ||
347 | |||
348 | void ape_dumpinfo(struct ape_ctx_t* ape_ctx) | ||
349 | { | ||
350 | int i; | ||
351 | |||
352 | printf("Descriptor Block:\n\n"); | ||
353 | printf("magic = \"%c%c%c%c\"\n", | ||
354 | ape_ctx->magic[0],ape_ctx->magic[1], | ||
355 | ape_ctx->magic[2],ape_ctx->magic[3]); | ||
356 | printf("fileversion = %d\n",ape_ctx->fileversion); | ||
357 | printf("descriptorlength = %d\n",ape_ctx->descriptorlength); | ||
358 | printf("headerlength = %d\n",ape_ctx->headerlength); | ||
359 | printf("seektablelength = %d\n",ape_ctx->seektablelength); | ||
360 | printf("wavheaderlength = %d\n",ape_ctx->wavheaderlength); | ||
361 | printf("audiodatalength = %d\n",ape_ctx->audiodatalength); | ||
362 | printf("audiodatalength_high = %d\n",ape_ctx->audiodatalength_high); | ||
363 | printf("wavtaillength = %d\n",ape_ctx->wavtaillength); | ||
364 | printf("md5 = "); | ||
365 | for (i = 0; i < 16; i++) | ||
366 | printf("%02x",ape_ctx->md5[i]); | ||
367 | printf("\n"); | ||
368 | |||
369 | printf("\nHeader Block:\n\n"); | ||
370 | |||
371 | printf("compressiontype = %d\n",ape_ctx->compressiontype); | ||
372 | printf("formatflags = %d\n",ape_ctx->formatflags); | ||
373 | printf("blocksperframe = %d\n",ape_ctx->blocksperframe); | ||
374 | printf("finalframeblocks = %d\n",ape_ctx->finalframeblocks); | ||
375 | printf("totalframes = %d\n",ape_ctx->totalframes); | ||
376 | printf("bps = %d\n",ape_ctx->bps); | ||
377 | printf("channels = %d\n",ape_ctx->channels); | ||
378 | printf("samplerate = %d\n",ape_ctx->samplerate); | ||
379 | |||
380 | printf("\nSeektable\n\n"); | ||
381 | if ((ape_ctx->seektablelength / sizeof(uint32_t)) != ape_ctx->totalframes) | ||
382 | { | ||
383 | printf("No seektable\n"); | ||
384 | } | ||
385 | else | ||
386 | { | ||
387 | for ( i = 0; i < ape_ctx->seektablelength / sizeof(uint32_t) ; i++) | ||
388 | { | ||
389 | if (i < ape_ctx->totalframes-1) { | ||
390 | printf("%8d %d (%d bytes)\n",i,ape_ctx->seektable[i],ape_ctx->seektable[i+1]-ape_ctx->seektable[i]); | ||
391 | } else { | ||
392 | printf("%8d %d\n",i,ape_ctx->seektable[i]); | ||
393 | } | ||
394 | } | ||
395 | } | ||
396 | printf("\nCalculated information:\n\n"); | ||
397 | printf("junklength = %d\n",ape_ctx->junklength); | ||
398 | printf("firstframe = %d\n",ape_ctx->firstframe); | ||
399 | printf("totalsamples = %d\n",ape_ctx->totalsamples); | ||
400 | } | ||
401 | |||
402 | #endif /* !ROCKBOX */ | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/parser.h b/lib/rbcodec/codecs/demac/libdemac/parser.h new file mode 100644 index 0000000000..6f07deac12 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/parser.h | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #ifndef _APE_PARSER_H | ||
26 | #define _APE_PARSER_H | ||
27 | |||
28 | #include <inttypes.h> | ||
29 | #include "demac_config.h" | ||
30 | |||
31 | /* The earliest and latest file formats supported by this library */ | ||
32 | #define APE_MIN_VERSION 3970 | ||
33 | #define APE_MAX_VERSION 3990 | ||
34 | |||
35 | #define MAC_FORMAT_FLAG_8_BIT 1 // is 8-bit [OBSOLETE] | ||
36 | #define MAC_FORMAT_FLAG_CRC 2 // uses the new CRC32 error detection [OBSOLETE] | ||
37 | #define MAC_FORMAT_FLAG_HAS_PEAK_LEVEL 4 // uint32 nPeakLevel after the header [OBSOLETE] | ||
38 | #define MAC_FORMAT_FLAG_24_BIT 8 // is 24-bit [OBSOLETE] | ||
39 | #define MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS 16 // has the number of seek elements after the peak level | ||
40 | #define MAC_FORMAT_FLAG_CREATE_WAV_HEADER 32 // create the wave header on decompression (not stored) | ||
41 | |||
42 | |||
43 | /* Special frame codes: | ||
44 | |||
45 | MONO_SILENCE - All PCM samples in frame are zero (mono streams only) | ||
46 | LEFT_SILENCE - All PCM samples for left channel in frame are zero (stereo streams) | ||
47 | RIGHT_SILENCE - All PCM samples for left channel in frame are zero (stereo streams) | ||
48 | PSEUDO_STEREO - Left and Right channels are identical | ||
49 | |||
50 | */ | ||
51 | |||
52 | #define APE_FRAMECODE_MONO_SILENCE 1 | ||
53 | #define APE_FRAMECODE_LEFT_SILENCE 1 /* same as mono */ | ||
54 | #define APE_FRAMECODE_RIGHT_SILENCE 2 | ||
55 | #define APE_FRAMECODE_STEREO_SILENCE 3 /* combined */ | ||
56 | #define APE_FRAMECODE_PSEUDO_STEREO 4 | ||
57 | |||
58 | #define PREDICTOR_ORDER 8 | ||
59 | /* Total size of all predictor histories - 50 * sizeof(int32_t) */ | ||
60 | #define PREDICTOR_SIZE 50 | ||
61 | |||
62 | |||
63 | /* NOTE: This struct is used in predictor-arm.S - any updates need to | ||
64 | be reflected there. */ | ||
65 | |||
66 | struct predictor_t | ||
67 | { | ||
68 | /* Filter histories */ | ||
69 | int32_t* buf; | ||
70 | |||
71 | int32_t YlastA; | ||
72 | int32_t XlastA; | ||
73 | |||
74 | /* NOTE: The order of the next four fields is important for | ||
75 | predictor-arm.S */ | ||
76 | int32_t YfilterB; | ||
77 | int32_t XfilterA; | ||
78 | int32_t XfilterB; | ||
79 | int32_t YfilterA; | ||
80 | |||
81 | /* Adaption co-efficients */ | ||
82 | int32_t YcoeffsA[4]; | ||
83 | int32_t XcoeffsA[4]; | ||
84 | int32_t YcoeffsB[5]; | ||
85 | int32_t XcoeffsB[5]; | ||
86 | int32_t historybuffer[PREDICTOR_HISTORY_SIZE + PREDICTOR_SIZE]; | ||
87 | }; | ||
88 | |||
89 | struct ape_ctx_t | ||
90 | { | ||
91 | /* Derived fields */ | ||
92 | uint32_t junklength; | ||
93 | uint32_t firstframe; | ||
94 | uint32_t totalsamples; | ||
95 | |||
96 | /* Info from Descriptor Block */ | ||
97 | char magic[4]; | ||
98 | int16_t fileversion; | ||
99 | int16_t padding1; | ||
100 | uint32_t descriptorlength; | ||
101 | uint32_t headerlength; | ||
102 | uint32_t seektablelength; | ||
103 | uint32_t wavheaderlength; | ||
104 | uint32_t audiodatalength; | ||
105 | uint32_t audiodatalength_high; | ||
106 | uint32_t wavtaillength; | ||
107 | uint8_t md5[16]; | ||
108 | |||
109 | /* Info from Header Block */ | ||
110 | uint16_t compressiontype; | ||
111 | uint16_t formatflags; | ||
112 | uint32_t blocksperframe; | ||
113 | uint32_t finalframeblocks; | ||
114 | uint32_t totalframes; | ||
115 | uint16_t bps; | ||
116 | uint16_t channels; | ||
117 | uint32_t samplerate; | ||
118 | |||
119 | /* Seektable */ | ||
120 | uint32_t* seektable; /* Seektable buffer */ | ||
121 | uint32_t maxseekpoints; /* Max seekpoints we can store (size of seektable buffer) */ | ||
122 | uint32_t numseekpoints; /* Number of seekpoints */ | ||
123 | int seektablefilepos; /* Location in .ape file of seektable */ | ||
124 | |||
125 | /* Decoder state */ | ||
126 | uint32_t CRC; | ||
127 | int frameflags; | ||
128 | int currentframeblocks; | ||
129 | int blocksdecoded; | ||
130 | struct predictor_t predictor; | ||
131 | }; | ||
132 | |||
133 | int ape_parseheader(int fd, struct ape_ctx_t* ape_ctx); | ||
134 | int ape_parseheaderbuf(unsigned char* buf, struct ape_ctx_t* ape_ctx); | ||
135 | void ape_dumpinfo(struct ape_ctx_t* ape_ctx); | ||
136 | |||
137 | #endif | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/predictor-arm.S b/lib/rbcodec/codecs/demac/libdemac/predictor-arm.S new file mode 100644 index 0000000000..92a78ed9b4 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/predictor-arm.S | |||
@@ -0,0 +1,702 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | #include "demac_config.h" | ||
25 | |||
26 | .section ICODE_SECTION_DEMAC_ARM,"ax",%progbits | ||
27 | |||
28 | .align 2 | ||
29 | |||
30 | /* NOTE: The following need to be kept in sync with parser.h */ | ||
31 | |||
32 | #define YDELAYA 200 | ||
33 | #define YDELAYB 168 | ||
34 | #define XDELAYA 136 | ||
35 | #define XDELAYB 104 | ||
36 | #define YADAPTCOEFFSA 72 | ||
37 | #define XADAPTCOEFFSA 56 | ||
38 | #define YADAPTCOEFFSB 40 | ||
39 | #define XADAPTCOEFFSB 20 | ||
40 | |||
41 | /* struct predictor_t members: */ | ||
42 | #define buf 0 /* int32_t* buf */ | ||
43 | |||
44 | #define YlastA 4 /* int32_t YlastA; */ | ||
45 | #define XlastA 8 /* int32_t XlastA; */ | ||
46 | |||
47 | #define YfilterB 12 /* int32_t YfilterB; */ | ||
48 | #define XfilterA 16 /* int32_t XfilterA; */ | ||
49 | |||
50 | #define XfilterB 20 /* int32_t XfilterB; */ | ||
51 | #define YfilterA 24 /* int32_t YfilterA; */ | ||
52 | |||
53 | #define YcoeffsA 28 /* int32_t YcoeffsA[4]; */ | ||
54 | #define XcoeffsA 44 /* int32_t XcoeffsA[4]; */ | ||
55 | #define YcoeffsB 60 /* int32_t YcoeffsB[5]; */ | ||
56 | #define XcoeffsB 80 /* int32_t XcoeffsB[5]; */ | ||
57 | |||
58 | #define historybuffer 100 /* int32_t historybuffer[] */ | ||
59 | |||
60 | @ Macro for loading 2 registers, for various ARM versions. | ||
61 | @ Registers must start with an even register, and must be consecutive. | ||
62 | |||
63 | .macro LDR2OFS reg1, reg2, base, offset | ||
64 | #if ARM_ARCH >= 6 | ||
65 | ldrd \reg1, [\base, \offset] | ||
66 | #else /* ARM_ARCH < 6 */ | ||
67 | #ifdef CPU_ARM7TDMI | ||
68 | add \reg1, \base, \offset | ||
69 | ldmia \reg1, {\reg1, \reg2} | ||
70 | #else /* ARM9 (v4 and v5) is faster this way */ | ||
71 | ldr \reg1, [\base, \offset] | ||
72 | ldr \reg2, [\base, \offset+4] | ||
73 | #endif | ||
74 | #endif /* ARM_ARCH */ | ||
75 | .endm | ||
76 | |||
77 | @ Macro for storing 2 registers, for various ARM versions. | ||
78 | @ Registers must start with an even register, and must be consecutive. | ||
79 | |||
80 | .macro STR2OFS reg1, reg2, base, offset | ||
81 | #if ARM_ARCH >= 6 | ||
82 | strd \reg1, [\base, \offset] | ||
83 | #else | ||
84 | str \reg1, [\base, \offset] | ||
85 | str \reg2, [\base, \offset+4] | ||
86 | #endif | ||
87 | .endm | ||
88 | |||
89 | .global predictor_decode_stereo | ||
90 | .type predictor_decode_stereo,%function | ||
91 | |||
92 | @ Register usage: | ||
93 | @ | ||
94 | @ r0-r11 - scratch | ||
95 | @ r12 - struct predictor_t* p | ||
96 | @ r14 - int32_t* p->buf | ||
97 | |||
98 | @ void predictor_decode_stereo(struct predictor_t* p, | ||
99 | @ int32_t* decoded0, | ||
100 | @ int32_t* decoded1, | ||
101 | @ int count) | ||
102 | |||
103 | predictor_decode_stereo: | ||
104 | stmdb sp!, {r1-r11, lr} | ||
105 | |||
106 | @ r1 (decoded0) is [sp] | ||
107 | @ r2 (decoded1) is [sp, #4] | ||
108 | @ r3 (count) is [sp, #8] | ||
109 | |||
110 | mov r12, r0 @ r12 := p | ||
111 | ldr r14, [r0] @ r14 := p->buf | ||
112 | |||
113 | loop: | ||
114 | |||
115 | @@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR Y | ||
116 | |||
117 | @ Predictor Y, Filter A | ||
118 | |||
119 | ldr r11, [r12, #YlastA] @ r11 := p->YlastA | ||
120 | |||
121 | add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3] | ||
122 | ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3] | ||
123 | @ r3 := p->buf[YDELAYA-2] | ||
124 | @ r10 := p->buf[YDELAYA-1] | ||
125 | |||
126 | add r6, r12, #YcoeffsA | ||
127 | ldmia r6, {r6 - r9} @ r6 := p->YcoeffsA[0] | ||
128 | @ r7 := p->YcoeffsA[1] | ||
129 | @ r8 := p->YcoeffsA[2] | ||
130 | @ r9 := p->YcoeffsA[3] | ||
131 | |||
132 | subs r10, r11, r10 @ r10 := r11 - r10 | ||
133 | |||
134 | STR2OFS r10, r11, r14, #YDELAYA-4 | ||
135 | @ p->buf[YDELAYA-1] = r10 | ||
136 | @ p->buf[YDELAYA] = r11 | ||
137 | |||
138 | mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0] | ||
139 | mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] | ||
140 | mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | ||
141 | mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | ||
142 | |||
143 | @ flags were set above, in the subs instruction | ||
144 | mvngt r10, #0 | ||
145 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | ||
146 | |||
147 | cmp r11, #0 | ||
148 | mvngt r11, #0 | ||
149 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
150 | |||
151 | STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4 | ||
152 | @ p->buf[YADAPTCOEFFSA-1] := r10 | ||
153 | @ p->buf[YADAPTCOEFFSA] := r11 | ||
154 | |||
155 | @ NOTE: r0 now contains predictionA - don't overwrite. | ||
156 | |||
157 | @ Predictor Y, Filter B | ||
158 | |||
159 | LDR2OFS r6, r7, r12, #YfilterB @ r6 := p->YfilterB | ||
160 | @ r7 := p->XfilterA | ||
161 | |||
162 | add r2, r14, #YDELAYB-16 @ r2 := &p->buf[YDELAYB-4] | ||
163 | ldmia r2, {r2 - r4, r10} @ r2 := p->buf[YDELAYB-4] | ||
164 | @ r3 := p->buf[YDELAYB-3] | ||
165 | @ r4 := p->buf[YDELAYB-2] | ||
166 | @ r10 := p->buf[YDELAYB-1] | ||
167 | |||
168 | rsb r6, r6, r6, lsl #5 @ r6 := r6 * 32 - r6 ( == r6*31) | ||
169 | sub r11, r7, r6, asr #5 @ r11 (p->buf[YDELAYB]) := r7 - (r6 >> 5) | ||
170 | |||
171 | str r7, [r12, #YfilterB] @ p->YfilterB := r7 (p->XfilterA) | ||
172 | |||
173 | add r5, r12, #YcoeffsB | ||
174 | ldmia r5, {r5 - r9} @ r5 := p->YcoeffsB[0] | ||
175 | @ r6 := p->YcoeffsB[1] | ||
176 | @ r7 := p->YcoeffsB[2] | ||
177 | @ r8 := p->YcoeffsB[3] | ||
178 | @ r9 := p->YcoeffsB[4] | ||
179 | |||
180 | subs r10, r11, r10 @ r10 := r11 - r10 | ||
181 | |||
182 | STR2OFS r10, r11, r14, #YDELAYB-4 | ||
183 | @ p->buf[YDELAYB-1] = r10 | ||
184 | @ p->buf[YDELAYB] = r11 | ||
185 | |||
186 | mul r1, r11, r5 @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0] | ||
187 | mla r1, r10, r6, r1 @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1] | ||
188 | mla r1, r4, r7, r1 @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2] | ||
189 | mla r1, r3, r8, r1 @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3] | ||
190 | mla r1, r2, r9, r1 @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4] | ||
191 | |||
192 | @ flags were set above, in the subs instruction | ||
193 | mvngt r10, #0 | ||
194 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | ||
195 | |||
196 | cmp r11, #0 | ||
197 | mvngt r11, #0 | ||
198 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
199 | |||
200 | STR2OFS r10, r11, r14, #YADAPTCOEFFSB-4 | ||
201 | @ p->buf[YADAPTCOEFFSB-1] := r10 | ||
202 | @ p->buf[YADAPTCOEFFSB] := r11 | ||
203 | |||
204 | @ r0 still contains predictionA | ||
205 | @ r1 contains predictionB | ||
206 | |||
207 | @ Finish Predictor Y | ||
208 | |||
209 | ldr r2, [sp] @ r2 := decoded0 | ||
210 | add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1) | ||
211 | ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA | ||
212 | ldr r3, [r2] @ r3 := *decoded0 | ||
213 | rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31) | ||
214 | add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10) | ||
215 | str r1, [r12, #YlastA] @ p->YlastA := r1 | ||
216 | add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5) | ||
217 | str r1, [r12, #YfilterA] @ p->YfilterA := r1 | ||
218 | |||
219 | @ r1 contains p->YfilterA | ||
220 | @ r2 contains decoded0 | ||
221 | @ r3 contains *decoded0 | ||
222 | |||
223 | @ r5, r6, r7, r8, r9 contain p->YcoeffsB[0..4] | ||
224 | @ r10, r11 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB] | ||
225 | |||
226 | str r1, [r2], #4 @ *(decoded0++) := r1 (p->YfilterA) | ||
227 | str r2, [sp] @ save decoded0 | ||
228 | cmp r3, #0 | ||
229 | beq 3f | ||
230 | |||
231 | add r2, r14, #YADAPTCOEFFSB-16 | ||
232 | ldmia r2, {r2 - r4} @ r2 := p->buf[YADAPTCOEFFSB-4] | ||
233 | @ r3 := p->buf[YADAPTCOEFFSB-3] | ||
234 | @ r4 := p->buf[YADAPTCOEFFSB-2] | ||
235 | blt 1f | ||
236 | |||
237 | @ *decoded0 > 0 | ||
238 | |||
239 | sub r5, r5, r11 @ r5 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] | ||
240 | sub r6, r6, r10 @ r6 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] | ||
241 | sub r9, r9, r2 @ r9 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] | ||
242 | sub r8, r8, r3 @ r8 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] | ||
243 | sub r7, r7, r4 @ r7 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] | ||
244 | |||
245 | add r0, r12, #YcoeffsB | ||
246 | stmia r0, {r5 - r9} @ Save p->YcoeffsB[] | ||
247 | |||
248 | add r1, r12, #YcoeffsA | ||
249 | ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0] | ||
250 | @ r3 := p->YcoeffsA[1] | ||
251 | @ r4 := p->YcoeffsA[2] | ||
252 | @ r5 := p->YcoeffsA[3] | ||
253 | |||
254 | add r6, r14, #YADAPTCOEFFSA-12 | ||
255 | ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3] | ||
256 | @ r7 := p->buf[YADAPTCOEFFSA-2] | ||
257 | @ r8 := p->buf[YADAPTCOEFFSA-1] | ||
258 | @ r9 := p->buf[YADAPTCOEFFSA] | ||
259 | |||
260 | sub r5, r5, r6 @ r5 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
261 | sub r4, r4, r7 @ r4 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
262 | sub r3, r3, r8 @ r3 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
263 | sub r2, r2, r9 @ r2 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
264 | |||
265 | b 2f | ||
266 | |||
267 | |||
268 | 1: @ *decoded0 < 0 | ||
269 | |||
270 | add r5, r5, r11 @ r5 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] | ||
271 | add r6, r6, r10 @ r6 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] | ||
272 | add r9, r9, r2 @ r9 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] | ||
273 | add r8, r8, r3 @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] | ||
274 | add r7, r7, r4 @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] | ||
275 | |||
276 | add r0, r12, #YcoeffsB | ||
277 | stmia r0, {r5 - r9} @ Save p->YcoeffsB[] | ||
278 | |||
279 | add r1, r12, #YcoeffsA | ||
280 | ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0] | ||
281 | @ r3 := p->YcoeffsA[1] | ||
282 | @ r4 := p->YcoeffsA[2] | ||
283 | @ r5 := p->YcoeffsA[3] | ||
284 | |||
285 | add r6, r14, #YADAPTCOEFFSA-12 | ||
286 | ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3] | ||
287 | @ r7 := p->buf[YADAPTCOEFFSA-2] | ||
288 | @ r8 := p->buf[YADAPTCOEFFSA-1] | ||
289 | @ r9 := p->buf[YADAPTCOEFFSA] | ||
290 | |||
291 | add r5, r5, r6 @ r5 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] | ||
292 | add r4, r4, r7 @ r4 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] | ||
293 | add r3, r3, r8 @ r3 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] | ||
294 | add r2, r2, r9 @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] | ||
295 | |||
296 | 2: | ||
297 | stmia r1, {r2 - r5} @ Save p->YcoeffsA | ||
298 | |||
299 | 3: | ||
300 | |||
301 | @@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR X | ||
302 | |||
303 | @ Predictor X, Filter A | ||
304 | |||
305 | ldr r11, [r12, #XlastA] @ r11 := p->XlastA | ||
306 | |||
307 | add r2, r14, #XDELAYA-12 @ r2 := &p->buf[XDELAYA-3] | ||
308 | ldmia r2, {r2, r3, r10} @ r2 := p->buf[XDELAYA-3] | ||
309 | @ r3 := p->buf[XDELAYA-2] | ||
310 | @ r10 := p->buf[XDELAYA-1] | ||
311 | |||
312 | add r6, r12, #XcoeffsA | ||
313 | ldmia r6, {r6 - r9} @ r6 := p->XcoeffsA[0] | ||
314 | @ r7 := p->XcoeffsA[1] | ||
315 | @ r8 := p->XcoeffsA[2] | ||
316 | @ r9 := p->XcoeffsA[3] | ||
317 | |||
318 | subs r10, r11, r10 @ r10 := r11 - r10 | ||
319 | |||
320 | STR2OFS r10, r11, r14, #XDELAYA-4 | ||
321 | @ p->buf[XDELAYA-1] = r10 | ||
322 | @ p->buf[XDELAYA] = r11 | ||
323 | |||
324 | mul r0, r11, r6 @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0] | ||
325 | mla r0, r10, r7, r0 @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1] | ||
326 | mla r0, r3, r8, r0 @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] | ||
327 | mla r0, r2, r9, r0 @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] | ||
328 | |||
329 | @ flags were set above, in the subs instruction | ||
330 | mvngt r10, #0 | ||
331 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | ||
332 | |||
333 | cmp r11, #0 | ||
334 | mvngt r11, #0 | ||
335 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
336 | |||
337 | STR2OFS r10, r11, r14, #XADAPTCOEFFSA-4 | ||
338 | @ p->buf[XADAPTCOEFFSA-1] := r10 | ||
339 | @ p->buf[XADAPTCOEFFSA] := r11 | ||
340 | |||
341 | @ NOTE: r0 now contains predictionA - don't overwrite. | ||
342 | |||
343 | @ Predictor X, Filter B | ||
344 | |||
345 | LDR2OFS r6, r7, r12, #XfilterB @ r6 := p->XfilterB | ||
346 | @ r7 := p->YfilterA | ||
347 | |||
348 | add r2, r14, #XDELAYB-16 @ r2 := &p->buf[XDELAYB-4] | ||
349 | ldmia r2, {r2 - r4, r10} @ r2 := p->buf[XDELAYB-4] | ||
350 | @ r3 := p->buf[XDELAYB-3] | ||
351 | @ r4 := p->buf[XDELAYB-2] | ||
352 | @ r10 := p->buf[XDELAYB-1] | ||
353 | |||
354 | rsb r6, r6, r6, lsl #5 @ r6 := r2 * 32 - r6 ( == r6*31) | ||
355 | sub r11, r7, r6, asr #5 @ r11 (p->buf[XDELAYB]) := r7 - (r6 >> 5) | ||
356 | |||
357 | str r7, [r12, #XfilterB] @ p->XfilterB := r7 (p->YfilterA) | ||
358 | |||
359 | add r5, r12, #XcoeffsB | ||
360 | ldmia r5, {r5 - r9} @ r5 := p->XcoeffsB[0] | ||
361 | @ r6 := p->XcoeffsB[1] | ||
362 | @ r7 := p->XcoeffsB[2] | ||
363 | @ r8 := p->XcoeffsB[3] | ||
364 | @ r9 := p->XcoeffsB[4] | ||
365 | |||
366 | subs r10, r11, r10 @ r10 := r11 - r10 | ||
367 | |||
368 | STR2OFS r10, r11, r14, #XDELAYB-4 | ||
369 | @ p->buf[XDELAYB-1] = r10 | ||
370 | @ p->buf[XDELAYB] = r11 | ||
371 | |||
372 | mul r1, r11, r5 @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0] | ||
373 | mla r1, r10, r6, r1 @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1] | ||
374 | mla r1, r4, r7, r1 @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2] | ||
375 | mla r1, r3, r8, r1 @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3] | ||
376 | mla r1, r2, r9, r1 @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4] | ||
377 | |||
378 | @ flags were set above, in the subs instruction | ||
379 | mvngt r10, #0 | ||
380 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | ||
381 | |||
382 | cmp r11, #0 | ||
383 | mvngt r11, #0 | ||
384 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
385 | |||
386 | STR2OFS r10, r11, r14, #XADAPTCOEFFSB-4 | ||
387 | @ p->buf[XADAPTCOEFFSB-1] := r10 | ||
388 | @ p->buf[XADAPTCOEFFSB] := r11 | ||
389 | |||
390 | @ r0 still contains predictionA | ||
391 | @ r1 contains predictionB | ||
392 | |||
393 | @ Finish Predictor X | ||
394 | |||
395 | ldr r2, [sp, #4] @ r2 := decoded1 | ||
396 | add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1) | ||
397 | ldr r4, [r12, #XfilterA] @ r4 := p->XfilterA | ||
398 | ldr r3, [r2] @ r3 := *decoded1 | ||
399 | rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31) | ||
400 | add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10) | ||
401 | str r1, [r12, #XlastA] @ p->XlastA := r1 | ||
402 | add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5) | ||
403 | str r1, [r12, #XfilterA] @ p->XfilterA := r1 | ||
404 | |||
405 | @ r1 contains p->XfilterA | ||
406 | @ r2 contains decoded1 | ||
407 | @ r3 contains *decoded1 | ||
408 | |||
409 | @ r5, r6, r7, r8, r9 contain p->XcoeffsB[0..4] | ||
410 | @ r10, r11 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB] | ||
411 | |||
412 | str r1, [r2], #4 @ *(decoded1++) := r1 (p->XfilterA) | ||
413 | str r2, [sp, #4] @ save decoded1 | ||
414 | cmp r3, #0 | ||
415 | beq 3f | ||
416 | |||
417 | add r2, r14, #XADAPTCOEFFSB-16 | ||
418 | ldmia r2, {r2 - r4} @ r2 := p->buf[XADAPTCOEFFSB-4] | ||
419 | @ r3 := p->buf[XADAPTCOEFFSB-3] | ||
420 | @ r4 := p->buf[XADAPTCOEFFSB-2] | ||
421 | blt 1f | ||
422 | |||
423 | @ *decoded1 > 0 | ||
424 | |||
425 | sub r5, r5, r11 @ r5 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] | ||
426 | sub r6, r6, r10 @ r6 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] | ||
427 | sub r9, r9, r2 @ r9 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] | ||
428 | sub r8, r8, r3 @ r8 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] | ||
429 | sub r7, r7, r4 @ r7 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] | ||
430 | |||
431 | add r0, r12, #XcoeffsB | ||
432 | stmia r0, {r5 - r9} @ Save p->XcoeffsB[] | ||
433 | |||
434 | add r1, r12, #XcoeffsA | ||
435 | ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0] | ||
436 | @ r3 := p->XcoeffsA[1] | ||
437 | @ r4 := p->XcoeffsA[2] | ||
438 | @ r5 := p->XcoeffsA[3] | ||
439 | |||
440 | add r6, r14, #XADAPTCOEFFSA-12 | ||
441 | ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3] | ||
442 | @ r7 := p->buf[XADAPTCOEFFSA-2] | ||
443 | @ r8 := p->buf[XADAPTCOEFFSA-1] | ||
444 | @ r9 := p->buf[XADAPTCOEFFSA] | ||
445 | |||
446 | sub r5, r5, r6 @ r5 := p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3] | ||
447 | sub r4, r4, r7 @ r4 := p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2] | ||
448 | sub r3, r3, r8 @ r3 := p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1] | ||
449 | sub r2, r2, r9 @ r2 := p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA] | ||
450 | |||
451 | b 2f | ||
452 | |||
453 | |||
454 | 1: @ *decoded1 < 0 | ||
455 | |||
456 | add r5, r5, r11 @ r5 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] | ||
457 | add r6, r6, r10 @ r6 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] | ||
458 | add r9, r9, r2 @ r9 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] | ||
459 | add r8, r8, r3 @ r8 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] | ||
460 | add r7, r7, r4 @ r7 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] | ||
461 | |||
462 | add r0, r12, #XcoeffsB | ||
463 | stmia r0, {r5 - r9} @ Save p->XcoeffsB[] | ||
464 | |||
465 | add r1, r12, #XcoeffsA | ||
466 | ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0] | ||
467 | @ r3 := p->XcoeffsA[1] | ||
468 | @ r4 := p->XcoeffsA[2] | ||
469 | @ r5 := p->XcoeffsA[3] | ||
470 | |||
471 | add r6, r14, #XADAPTCOEFFSA-12 | ||
472 | ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3] | ||
473 | @ r7 := p->buf[XADAPTCOEFFSA-2] | ||
474 | @ r8 := p->buf[XADAPTCOEFFSA-1] | ||
475 | @ r9 := p->buf[XADAPTCOEFFSA] | ||
476 | |||
477 | add r5, r5, r6 @ r5 := p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3] | ||
478 | add r4, r4, r7 @ r4 := p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2] | ||
479 | add r3, r3, r8 @ r3 := p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1] | ||
480 | add r2, r2, r9 @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] | ||
481 | |||
482 | 2: | ||
483 | stmia r1, {r2 - r5} @ Save p->XcoeffsA | ||
484 | |||
485 | 3: | ||
486 | |||
487 | @@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON | ||
488 | |||
489 | add r14, r14, #4 @ p->buf++ | ||
490 | |||
491 | add r11, r12, #historybuffer @ r11 := &p->historybuffer[0] | ||
492 | |||
493 | sub r10, r14, #PREDICTOR_HISTORY_SIZE*4 | ||
494 | @ r10 := p->buf - PREDICTOR_HISTORY_SIZE | ||
495 | |||
496 | ldr r0, [sp, #8] | ||
497 | cmp r10, r11 | ||
498 | beq move_hist @ The history buffer is full, we need to do a memmove | ||
499 | |||
500 | @ Check loop count | ||
501 | subs r0, r0, #1 | ||
502 | strne r0, [sp, #8] | ||
503 | bne loop | ||
504 | |||
505 | done: | ||
506 | str r14, [r12] @ Save value of p->buf | ||
507 | add sp, sp, #12 @ Don't bother restoring r1-r3 | ||
508 | #ifdef ROCKBOX | ||
509 | ldmpc regs=r4-r11 | ||
510 | #else | ||
511 | ldmia sp!, {r4 - r11, pc} | ||
512 | #endif | ||
513 | |||
514 | move_hist: | ||
515 | @ dest = r11 (p->historybuffer) | ||
516 | @ src = r14 (p->buf) | ||
517 | @ n = 200 | ||
518 | |||
519 | ldmia r14!, {r0-r9} @ 40 bytes | ||
520 | stmia r11!, {r0-r9} | ||
521 | ldmia r14!, {r0-r9} @ 40 bytes | ||
522 | stmia r11!, {r0-r9} | ||
523 | ldmia r14!, {r0-r9} @ 40 bytes | ||
524 | stmia r11!, {r0-r9} | ||
525 | ldmia r14!, {r0-r9} @ 40 bytes | ||
526 | stmia r11!, {r0-r9} | ||
527 | ldmia r14!, {r0-r9} @ 40 bytes | ||
528 | stmia r11!, {r0-r9} | ||
529 | |||
530 | ldr r0, [sp, #8] | ||
531 | add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0] | ||
532 | |||
533 | @ Check loop count | ||
534 | subs r0, r0, #1 | ||
535 | strne r0, [sp, #8] | ||
536 | bne loop | ||
537 | |||
538 | b done | ||
539 | .size predictor_decode_stereo, .-predictor_decode_stereo | ||
540 | |||
541 | .global predictor_decode_mono | ||
542 | .type predictor_decode_mono,%function | ||
543 | |||
544 | @ Register usage: | ||
545 | @ | ||
546 | @ r0-r11 - scratch | ||
547 | @ r12 - struct predictor_t* p | ||
548 | @ r14 - int32_t* p->buf | ||
549 | |||
550 | @ void predictor_decode_mono(struct predictor_t* p, | ||
551 | @ int32_t* decoded0, | ||
552 | @ int count) | ||
553 | |||
554 | predictor_decode_mono: | ||
555 | stmdb sp!, {r1, r2, r4-r11, lr} | ||
556 | |||
557 | @ r1 (decoded0) is [sp] | ||
558 | @ r2 (count) is [sp, #4] | ||
559 | |||
560 | mov r12, r0 @ r12 := p | ||
561 | ldr r14, [r0] @ r14 := p->buf | ||
562 | |||
563 | loopm: | ||
564 | |||
565 | @@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR | ||
566 | |||
567 | ldr r11, [r12, #YlastA] @ r11 := p->YlastA | ||
568 | |||
569 | add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3] | ||
570 | ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3] | ||
571 | @ r3 := p->buf[YDELAYA-2] | ||
572 | @ r10 := p->buf[YDELAYA-1] | ||
573 | |||
574 | add r5, r12, #YcoeffsA @ r5 := &p->YcoeffsA[0] | ||
575 | ldmia r5, {r6 - r9} @ r6 := p->YcoeffsA[0] | ||
576 | @ r7 := p->YcoeffsA[1] | ||
577 | @ r8 := p->YcoeffsA[2] | ||
578 | @ r9 := p->YcoeffsA[3] | ||
579 | |||
580 | subs r10, r11, r10 @ r10 := r11 - r10 | ||
581 | |||
582 | STR2OFS r10, r11, r14, #YDELAYA-4 | ||
583 | @ p->buf[YDELAYA-1] = r10 | ||
584 | @ p->buf[YDELAYA] = r11 | ||
585 | |||
586 | mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0] | ||
587 | mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] | ||
588 | mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | ||
589 | mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | ||
590 | |||
591 | @ flags were set above, in the subs instruction | ||
592 | mvngt r10, #0 | ||
593 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | ||
594 | |||
595 | cmp r11, #0 | ||
596 | mvngt r11, #0 | ||
597 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
598 | |||
599 | STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4 | ||
600 | @ p->buf[YADAPTCOEFFSA-1] := r10 | ||
601 | @ p->buf[YADAPTCOEFFSA] := r11 | ||
602 | |||
603 | ldr r2, [sp] @ r2 := decoded0 | ||
604 | ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA | ||
605 | ldr r3, [r2] @ r3 := *decoded0 | ||
606 | rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31) | ||
607 | add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10) | ||
608 | str r1, [r12, #YlastA] @ p->YlastA := r1 | ||
609 | add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5) | ||
610 | str r1, [r12, #YfilterA] @ p->YfilterA := r1 | ||
611 | |||
612 | @ r1 contains p->YfilterA | ||
613 | @ r2 contains decoded0 | ||
614 | @ r3 contains *decoded0 | ||
615 | |||
616 | @ r6, r7, r8, r9 contain p->YcoeffsA[0..3] | ||
617 | @ r10, r11 contain p->buf[YADAPTCOEFFSA-1] and p->buf[YADAPTCOEFFSA] | ||
618 | |||
619 | str r1, [r2], #4 @ *(decoded0++) := r1 (p->YfilterA) | ||
620 | str r2, [sp] @ save decoded0 | ||
621 | cmp r3, #0 | ||
622 | beq 3f | ||
623 | |||
624 | LDR2OFS r2, r3, r14, #YADAPTCOEFFSA-12 | ||
625 | @ r2 := p->buf[YADAPTCOEFFSA-3] | ||
626 | @ r3 := p->buf[YADAPTCOEFFSA-2] | ||
627 | blt 1f | ||
628 | |||
629 | @ *decoded0 > 0 | ||
630 | |||
631 | sub r6, r6, r11 @ r6 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
632 | sub r7, r7, r10 @ r7 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
633 | sub r9, r9, r2 @ r9 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
634 | sub r8, r8, r3 @ r8 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
635 | |||
636 | b 2f | ||
637 | |||
638 | 1: @ *decoded0 < 0 | ||
639 | |||
640 | add r6, r6, r11 @ r6 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] | ||
641 | add r7, r7, r10 @ r7 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] | ||
642 | add r9, r9, r2 @ r9 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] | ||
643 | add r8, r8, r3 @ r8 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] | ||
644 | |||
645 | 2: | ||
646 | stmia r5, {r6 - r9} @ Save p->YcoeffsA | ||
647 | |||
648 | 3: | ||
649 | |||
650 | @@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON | ||
651 | |||
652 | add r14, r14, #4 @ p->buf++ | ||
653 | |||
654 | add r11, r12, #historybuffer @ r11 := &p->historybuffer[0] | ||
655 | |||
656 | sub r10, r14, #PREDICTOR_HISTORY_SIZE*4 | ||
657 | @ r10 := p->buf - PREDICTOR_HISTORY_SIZE | ||
658 | |||
659 | ldr r0, [sp, #4] | ||
660 | cmp r10, r11 | ||
661 | beq move_histm @ The history buffer is full, we need to do a memmove | ||
662 | |||
663 | @ Check loop count | ||
664 | subs r0, r0, #1 | ||
665 | strne r0, [sp, #4] | ||
666 | bne loopm | ||
667 | |||
668 | donem: | ||
669 | str r14, [r12] @ Save value of p->buf | ||
670 | add sp, sp, #8 @ Don't bother restoring r1, r2 | ||
671 | #ifdef ROCKBOX | ||
672 | ldmpc regs=r4-r11 | ||
673 | #else | ||
674 | ldmia sp!, {r4 - r11, pc} | ||
675 | #endif | ||
676 | |||
677 | move_histm: | ||
678 | @ dest = r11 (p->historybuffer) | ||
679 | @ src = r14 (p->buf) | ||
680 | @ n = 200 | ||
681 | |||
682 | ldmia r14!, {r0-r9} @ 40 bytes | ||
683 | stmia r11!, {r0-r9} | ||
684 | ldmia r14!, {r0-r9} @ 40 bytes | ||
685 | stmia r11!, {r0-r9} | ||
686 | ldmia r14!, {r0-r9} @ 40 bytes | ||
687 | stmia r11!, {r0-r9} | ||
688 | ldmia r14!, {r0-r9} @ 40 bytes | ||
689 | stmia r11!, {r0-r9} | ||
690 | ldmia r14!, {r0-r9} @ 40 bytes | ||
691 | stmia r11!, {r0-r9} | ||
692 | |||
693 | ldr r0, [sp, #4] | ||
694 | add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0] | ||
695 | |||
696 | @ Check loop count | ||
697 | subs r0, r0, #1 | ||
698 | strne r0, [sp, #4] | ||
699 | bne loopm | ||
700 | |||
701 | b donem | ||
702 | .size predictor_decode_mono, .-predictor_decode_mono | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S b/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S new file mode 100644 index 0000000000..fc1d901a59 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S | |||
@@ -0,0 +1,660 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | Coldfire predictor copyright (C) 2007 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | #include "demac_config.h" | ||
28 | |||
29 | /* NOTE: The following need to be kept in sync with parser.h */ | ||
30 | |||
31 | #define YDELAYA 200 | ||
32 | #define YDELAYB 168 | ||
33 | #define XDELAYA 136 | ||
34 | #define XDELAYB 104 | ||
35 | #define YADAPTCOEFFSA 72 | ||
36 | #define XADAPTCOEFFSA 56 | ||
37 | #define YADAPTCOEFFSB 40 | ||
38 | #define XADAPTCOEFFSB 20 | ||
39 | |||
40 | /* struct predictor_t members: */ | ||
41 | #define buf 0 /* int32_t* buf */ | ||
42 | |||
43 | #define YlastA 4 /* int32_t YlastA; */ | ||
44 | #define XlastA 8 /* int32_t XlastA; */ | ||
45 | |||
46 | #define YfilterB 12 /* int32_t YfilterB; */ | ||
47 | #define XfilterA 16 /* int32_t XfilterA; */ | ||
48 | |||
49 | #define XfilterB 20 /* int32_t XfilterB; */ | ||
50 | #define YfilterA 24 /* int32_t YfilterA; */ | ||
51 | |||
52 | #define YcoeffsA 28 /* int32_t YcoeffsA[4]; */ | ||
53 | #define XcoeffsA 44 /* int32_t XcoeffsA[4]; */ | ||
54 | #define YcoeffsB 60 /* int32_t YcoeffsB[5]; */ | ||
55 | #define XcoeffsB 80 /* int32_t XcoeffsB[5]; */ | ||
56 | |||
57 | #define historybuffer 100 /* int32_t historybuffer[] */ | ||
58 | |||
59 | |||
60 | .text | ||
61 | |||
62 | .align 2 | ||
63 | |||
64 | .global predictor_decode_stereo | ||
65 | .type predictor_decode_stereo,@function | ||
66 | |||
67 | | void predictor_decode_stereo(struct predictor_t* p, | ||
68 | | int32_t* decoded0, | ||
69 | | int32_t* decoded1, | ||
70 | | int count) | ||
71 | |||
72 | predictor_decode_stereo: | ||
73 | lea.l (-12*4,%sp), %sp | ||
74 | movem.l %d2-%d7/%a2-%a6, (4,%sp) | ||
75 | |||
76 | movem.l (12*4+8,%sp), %a3-%a5 | %a3 = decoded0 | ||
77 | | %a4 = decoded1 | ||
78 | move.l %a5, (%sp) | (%sp) = count | ||
79 | |||
80 | move.l #0, %macsr | signed integer mode | ||
81 | move.l (12*4+4,%sp), %a6 | %a6 = p | ||
82 | move.l (%a6), %a5 | %a5 = p->buf | ||
83 | |||
84 | .loop: | ||
85 | |||
86 | | ***** PREDICTOR Y ***** | ||
87 | |||
88 | | Predictor Y, Filter A | ||
89 | |||
90 | move.l (YlastA,%a6), %d3 | %d3 = p->YlastA | ||
91 | |||
92 | movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3] | ||
93 | | %d1 = p->buf[YDELAYA-2] | ||
94 | | %d2 = p->buf[YDELAYA-1] | ||
95 | |||
96 | move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 | ||
97 | |||
98 | sub.l %d3, %d2 | ||
99 | neg.l %d2 | %d2 = %d3 - %d2 | ||
100 | |||
101 | move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2 | ||
102 | |||
103 | movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] | ||
104 | | %d5 = p->YcoeffsA[1] | ||
105 | | %d6 = p->YcoeffsA[2] | ||
106 | | %d7 = p->YcoeffsA[3] | ||
107 | |||
108 | mac.l %d3, %d4, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0] | ||
109 | mac.l %d2, %d5, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] | ||
110 | mac.l %d1, %d6, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | ||
111 | mac.l %d0, %d7, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | ||
112 | |||
113 | tst.l %d2 | ||
114 | beq.s 1f | ||
115 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 | ||
116 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 | ||
117 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 | ||
118 | 1: | %d2 = SIGN(%d2) | ||
119 | move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2 | ||
120 | |||
121 | tst.l %d3 | ||
122 | beq.s 1f | ||
123 | spl.b %d3 | ||
124 | extb.l %d3 | ||
125 | or.l #1, %d3 | ||
126 | 1: | %d3 = SIGN(%d3) | ||
127 | move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 | ||
128 | |||
129 | | Predictor Y, Filter B | ||
130 | |||
131 | movem.l (YfilterB,%a6), %d2-%d3 | %d2 = p->YfilterB | ||
132 | | %d3 = p->XfilterA | ||
133 | move.l %d3, (YfilterB,%a6) | p->YfilterB = %d3 | ||
134 | |||
135 | move.l %d2, %d1 | %d1 = %d2 | ||
136 | lsl.l #5, %d2 | %d2 = %d2 * 32 | ||
137 | sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2) | ||
138 | asr.l #5, %d2 | %d2 >>= 5 | ||
139 | sub.l %d2, %d3 | %d3 -= %d2 | ||
140 | |||
141 | movem.l (YDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[YDELAYB-4] | ||
142 | | %d5 = p->buf[YDELAYB-3] | ||
143 | | %d6 = p->buf[YDELAYB-2] | ||
144 | | %d7 = p->buf[YDELAYB-1] | ||
145 | sub.l %d3, %d7 | ||
146 | neg.l %d7 | %d7 = %d3 - %d7 | ||
147 | |||
148 | move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7 | ||
149 | |||
150 | movem.l (YcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->YcoeffsB[0] | ||
151 | | %d2 = p->YcoeffsB[1] | ||
152 | | %a0 = p->YcoeffsB[2] | ||
153 | | %a1 = p->YcoeffsB[3] | ||
154 | | %a2 = p->YcoeffsB[4] | ||
155 | |||
156 | mac.l %d3, %d1, %acc1 | %acc1 = p->buf[YDELAYB] * p->YcoeffsB[0] | ||
157 | mac.l %d7, %d2, %acc1 | %acc1 += p->buf[YDELAYB-1] * p->YcoeffsB[1] | ||
158 | mac.l %d6, %a0, %acc1 | %acc1 += p->buf[YDELAYB-2] * p->YcoeffsB[2] | ||
159 | mac.l %d5, %a1, %acc1 | %acc1 += p->buf[YDELAYB-3] * p->YcoeffsB[3] | ||
160 | mac.l %d4, %a2, %acc1 | %acc1 += p->buf[YDELAYB-4] * p->YcoeffsB[4] | ||
161 | |||
162 | move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3 | ||
163 | |||
164 | tst.l %d7 | ||
165 | beq.s 1f | ||
166 | spl.b %d7 | ||
167 | extb.l %d7 | ||
168 | or.l #1, %d7 | ||
169 | 1: | %d7 = SIGN(%d7) | ||
170 | move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7 | ||
171 | tst.l %d3 | ||
172 | beq.s 1f | ||
173 | spl.b %d3 | ||
174 | extb.l %d3 | ||
175 | or.l #1, %d3 | ||
176 | 1: | %d3 = SIGN(%d3) | ||
177 | move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3 | ||
178 | |||
179 | | %d1, %d2, %a0, %a1, %a2 contain p->YcoeffsB[0..4] | ||
180 | | %d7, %d3 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB] | ||
181 | |||
182 | move.l (%a3), %d0 | %d0 = *decoded0 | ||
183 | beq.s 3f | ||
184 | |||
185 | movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[YADAPTCOEFFSB-4] | ||
186 | | %d5 = p->buf[YADAPTCOEFFSB-3] | ||
187 | | %d6 = p->buf[YADAPTCOEFFSB-2] | ||
188 | |||
189 | bmi.s 1f | flags still valid here | ||
190 | |||
191 | | *decoded0 > 0 | ||
192 | |||
193 | sub.l %d3, %d1 | %d1 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] | ||
194 | sub.l %d7, %d2 | %d2 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] | ||
195 | sub.l %d6, %a0 | %a0 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] | ||
196 | sub.l %d5, %a1 | %a1 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] | ||
197 | sub.l %d4, %a2 | %a2 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] | ||
198 | |||
199 | movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[] | ||
200 | |||
201 | movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] | ||
202 | | %d5 = p->YcoeffsA[1] | ||
203 | | %d6 = p->YcoeffsA[2] | ||
204 | | %d7 = p->YcoeffsA[3] | ||
205 | |||
206 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | ||
207 | | %d2 = p->buf[YADAPTCOEFFSA-3] | ||
208 | | %a0 = p->buf[YADAPTCOEFFSA-2] | ||
209 | | %a1 = p->buf[YADAPTCOEFFSA-1] | ||
210 | | %a2 = p->buf[YADAPTCOEFFSA] | ||
211 | |||
212 | sub.l %a2, %d4 | %d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
213 | sub.l %a1, %d5 | %d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
214 | sub.l %a0, %d6 | %d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
215 | sub.l %d2, %d7 | %d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
216 | |||
217 | bra.s 2f | ||
218 | |||
219 | 1: | *decoded0 < 0 | ||
220 | |||
221 | add.l %d3, %d1 | %d1 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] | ||
222 | add.l %d7, %d2 | %d2 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] | ||
223 | add.l %d6, %a0 | %a0 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] | ||
224 | add.l %d5, %a1 | %a1 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] | ||
225 | add.l %d4, %a2 | %a2 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] | ||
226 | |||
227 | movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[] | ||
228 | |||
229 | movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0] | ||
230 | | %d5 = p->YcoeffsA[1] | ||
231 | | %d6 = p->YcoeffsA[2] | ||
232 | | %d7 = p->YcoeffsA[3] | ||
233 | |||
234 | movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | ||
235 | | %d2 = p->buf[YADAPTCOEFFSA-3] | ||
236 | | %a0 = p->buf[YADAPTCOEFFSA-2] | ||
237 | | %a1 = p->buf[YADAPTCOEFFSA-1] | ||
238 | | %a2 = p->buf[YADAPTCOEFFSA] | ||
239 | |||
240 | add.l %a2, %d4 | %d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] | ||
241 | add.l %a1, %d5 | %d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] | ||
242 | add.l %a0, %d6 | %d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] | ||
243 | add.l %d2, %d7 | %d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] | ||
244 | |||
245 | 2: | ||
246 | movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[] | ||
247 | |||
248 | 3: | ||
249 | | Finish Predictor Y | ||
250 | |||
251 | movclr.l %acc0, %d1 | %d1 = predictionA | ||
252 | movclr.l %acc1, %d2 | %d2 = predictionB | ||
253 | asr.l #1, %d2 | ||
254 | add.l %d2, %d1 | %d1 += (%d2 >> 1) | ||
255 | asr.l #8, %d1 | ||
256 | asr.l #2, %d1 | %d1 >>= 10 | ||
257 | add.l %d0, %d1 | %d1 += %d0 | ||
258 | move.l %d1, (YlastA,%a6) | p->YlastA = %d1 | ||
259 | |||
260 | move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA | ||
261 | move.l %d2, %d0 | ||
262 | lsl.l #5, %d2 | ||
263 | sub.l %d0, %d2 | %d2 = 31 * %d2 | ||
264 | asr.l #5, %d2 | %d2 >>= 5 | ||
265 | add.l %d1, %d2 | ||
266 | move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2 | ||
267 | |||
268 | | *decoded0 stored 2 instructions down, avoiding pipeline stall | ||
269 | |||
270 | | ***** PREDICTOR X ***** | ||
271 | |||
272 | | Predictor X, Filter A | ||
273 | |||
274 | move.l (XlastA,%a6), %d3 | %d3 = p->XlastA | ||
275 | |||
276 | move.l %d2, (%a3)+ | *(decoded0++) = %d2 (p->YfilterA) | ||
277 | |||
278 | movem.l (XDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[XDELAYA-3] | ||
279 | | %d1 = p->buf[XDELAYA-2] | ||
280 | | %d2 = p->buf[XDELAYA-1] | ||
281 | |||
282 | move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3 | ||
283 | |||
284 | sub.l %d3, %d2 | ||
285 | neg.l %d2 | %d2 = %d3 -%d2 | ||
286 | |||
287 | move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2 | ||
288 | |||
289 | movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] | ||
290 | | %d5 = p->XcoeffsA[1] | ||
291 | | %d6 = p->XcoeffsA[2] | ||
292 | | %d7 = p->XcoeffsA[3] | ||
293 | |||
294 | mac.l %d3, %d4, %acc0 | %acc0 = p->buf[XDELAYA] * p->XcoeffsA[0] | ||
295 | mac.l %d2, %d5, %acc0 | %acc0 += p->buf[XDELAYA-1] * p->XcoeffsA[1] | ||
296 | mac.l %d1, %d6, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] | ||
297 | mac.l %d0, %d7, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] | ||
298 | |||
299 | tst.l %d2 | ||
300 | beq.s 1f | ||
301 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 | ||
302 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 | ||
303 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 | ||
304 | 1: | %d2 = SIGN(%d2) | ||
305 | move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = %d2 | ||
306 | |||
307 | tst.l %d3 | ||
308 | beq.s 1f | ||
309 | spl.b %d3 | ||
310 | extb.l %d3 | ||
311 | or.l #1, %d3 | ||
312 | 1: | %d3 = SIGN(%d3) | ||
313 | move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = %d3 | ||
314 | |||
315 | | Predictor X, Filter B | ||
316 | |||
317 | movem.l (XfilterB,%a6), %d2-%d3 | %d2 = p->XfilterB | ||
318 | | %d3 = p->YfilterA | ||
319 | move.l %d3, (XfilterB,%a6) | p->XfilterB = %d3 | ||
320 | |||
321 | move.l %d2, %d1 | %d1 = %d2 | ||
322 | lsl.l #5, %d2 | %d2 = %d2 * 32 | ||
323 | sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2) | ||
324 | asr.l #5, %d2 | %d2 >>= 5 | ||
325 | sub.l %d2, %d3 | %d3 -= %d2 | ||
326 | |||
327 | movem.l (XDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[XDELAYB-4] | ||
328 | | %d5 = p->buf[XDELAYB-3] | ||
329 | | %d6 = p->buf[XDELAYB-2] | ||
330 | | %d7 = p->buf[XDELAYB-1] | ||
331 | sub.l %d3, %d7 | ||
332 | neg.l %d7 | %d7 = %d3 - %d7 | ||
333 | |||
334 | move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7 | ||
335 | |||
336 | movem.l (XcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->XcoeffsB[0] | ||
337 | | %d2 = p->XcoeffsB[1] | ||
338 | | %a0 = p->XcoeffsB[2] | ||
339 | | %a1 = p->XcoeffsB[3] | ||
340 | | %a2 = p->XcoeffsB[4] | ||
341 | |||
342 | mac.l %d3, %d1, %acc1 | %acc1 = p->buf[XDELAYB] * p->XcoeffsB[0] | ||
343 | mac.l %d7, %d2, %acc1 | %acc1 += p->buf[XDELAYB-1] * p->XcoeffsB[1] | ||
344 | mac.l %d6, %a0, %acc1 | %acc1 += p->buf[XDELAYB-2] * p->XcoeffsB[2] | ||
345 | mac.l %d5, %a1, %acc1 | %acc1 += p->buf[XDELAYB-3] * p->XcoeffsB[3] | ||
346 | mac.l %d4, %a2, %acc1 | %acc1 += p->buf[XDELAYB-4] * p->XcoeffsB[4] | ||
347 | |||
348 | move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3 | ||
349 | |||
350 | tst.l %d7 | ||
351 | beq.s 1f | ||
352 | spl.b %d7 | ||
353 | extb.l %d7 | ||
354 | or.l #1, %d7 | ||
355 | 1: | %d7 = SIGN(%d7) | ||
356 | move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7 | ||
357 | |||
358 | tst.l %d3 | ||
359 | beq.s 1f | ||
360 | spl.b %d3 | ||
361 | extb.l %d3 | ||
362 | or.l #1, %d3 | ||
363 | 1: | %d3 = SIGN(%d3) | ||
364 | move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3 | ||
365 | |||
366 | | %d1, %d2, %a0, %a1, %a2 contain p->XcoeffsB[0..4] | ||
367 | | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB] | ||
368 | |||
369 | move.l (%a4), %d0 | %d0 = *decoded1 | ||
370 | beq.s 3f | ||
371 | |||
372 | movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[XADAPTCOEFFSB-4] | ||
373 | | %d5 = p->buf[XADAPTCOEFFSB-3] | ||
374 | | %d6 = p->buf[XADAPTCOEFFSB-2] | ||
375 | |||
376 | bmi.s 1f | flags still valid here | ||
377 | |||
378 | | *decoded1 > 0 | ||
379 | |||
380 | sub.l %d3, %d1 | %d1 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] | ||
381 | sub.l %d7, %d2 | %d2 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] | ||
382 | sub.l %d6, %a0 | %a0 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] | ||
383 | sub.l %d5, %a1 | %a1 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] | ||
384 | sub.l %d4, %a2 | %a2 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] | ||
385 | |||
386 | movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[] | ||
387 | |||
388 | movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] | ||
389 | | %d5 = p->XcoeffsA[1] | ||
390 | | %d6 = p->XcoeffsA[2] | ||
391 | | %d7 = p->XcoeffsA[3] | ||
392 | |||
393 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | ||
394 | | %d2 = p->buf[XADAPTCOEFFSA-3] | ||
395 | | %a0 = p->buf[XADAPTCOEFFSA-2] | ||
396 | | %a1 = p->buf[XADAPTCOEFFSA-1] | ||
397 | | %a2 = p->buf[XADAPTCOEFFSA] | ||
398 | |||
399 | sub.l %a2, %d4 | %d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA] | ||
400 | sub.l %a1, %d5 | %d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1] | ||
401 | sub.l %a0, %d6 | %d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2] | ||
402 | sub.l %d2, %d7 | %d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3] | ||
403 | |||
404 | bra.s 2f | ||
405 | |||
406 | 1: | *decoded1 < 0 | ||
407 | |||
408 | add.l %d3, %d1 | %d1 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] | ||
409 | add.l %d7, %d2 | %d2 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] | ||
410 | add.l %d6, %a0 | %a0 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] | ||
411 | add.l %d5, %a1 | %a1 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] | ||
412 | add.l %d4, %a2 | %a2 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] | ||
413 | |||
414 | movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[] | ||
415 | |||
416 | movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0] | ||
417 | | %d5 = p->XcoeffsA[1] | ||
418 | | %d6 = p->XcoeffsA[2] | ||
419 | | %d7 = p->XcoeffsA[3] | ||
420 | |||
421 | movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 | ||
422 | | %d2 = p->buf[XADAPTCOEFFSA-3] | ||
423 | | %a0 = p->buf[XADAPTCOEFFSA-2] | ||
424 | | %a1 = p->buf[XADAPTCOEFFSA-1] | ||
425 | | %a2 = p->buf[XADAPTCOEFFSA] | ||
426 | |||
427 | add.l %a2, %d4 | %d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] | ||
428 | add.l %a1, %d5 | %d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1] | ||
429 | add.l %a0, %d6 | %d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2] | ||
430 | add.l %d2, %d7 | %d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3] | ||
431 | |||
432 | 2: | ||
433 | movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[] | ||
434 | |||
435 | 3: | ||
436 | | Finish Predictor X | ||
437 | |||
438 | movclr.l %acc0, %d1 | %d1 = predictionA | ||
439 | movclr.l %acc1, %d2 | %d2 = predictionB | ||
440 | asr.l #1, %d2 | ||
441 | add.l %d2, %d1 | %d1 += (%d2 >> 1) | ||
442 | asr.l #8, %d1 | ||
443 | asr.l #2, %d1 | %d1 >>= 10 | ||
444 | add.l %d0, %d1 | %d1 += %d0 | ||
445 | move.l %d1, (XlastA,%a6) | p->XlastA = %d1 | ||
446 | |||
447 | move.l (XfilterA,%a6), %d2 | %d2 = p->XfilterA | ||
448 | move.l %d2, %d0 | ||
449 | lsl.l #5, %d2 | ||
450 | sub.l %d0, %d2 | %d2 = 31 * %d2 | ||
451 | asr.l #5, %d2 | %d6 >>= 2 | ||
452 | add.l %d1, %d2 | ||
453 | move.l %d2, (XfilterA,%a6) | p->XfilterA = %d2 | ||
454 | |||
455 | | *decoded1 stored 3 instructions down, avoiding pipeline stall | ||
456 | |||
457 | | ***** COMMON ***** | ||
458 | |||
459 | addq.l #4, %a5 | p->buf++ | ||
460 | lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a2 | ||
461 | | %a2 = &p->historybuffer[PREDICTOR_HISTORY_SIZE] | ||
462 | |||
463 | move.l %d2, (%a4)+ | *(decoded1++) = %d2 (p->XfilterA) | ||
464 | |||
465 | cmp.l %a2, %a5 | ||
466 | beq.s .move_hist | History buffer is full, we need to do a memmove | ||
467 | |||
468 | subq.l #1, (%sp) | decrease loop count | ||
469 | bne.w .loop | ||
470 | |||
471 | .done: | ||
472 | move.l %a5, (%a6) | Save value of p->buf | ||
473 | movem.l (4,%sp), %d2-%d7/%a2-%a6 | ||
474 | lea.l (12*4,%sp), %sp | ||
475 | rts | ||
476 | |||
477 | .move_hist: | ||
478 | lea.l (historybuffer,%a6), %a2 | ||
479 | |||
480 | | dest = %a2 (p->historybuffer) | ||
481 | | src = %a5 (p->buf) | ||
482 | | n = 200 | ||
483 | |||
484 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
485 | movem.l %d0-%d7/%a0-%a1, (%a2) | ||
486 | movem.l (40,%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
487 | movem.l %d0-%d7/%a0-%a1, (40,%a2) | ||
488 | movem.l (80,%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
489 | movem.l %d0-%d7/%a0-%a1, (80,%a2) | ||
490 | movem.l (120,%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
491 | movem.l %d0-%d7/%a0-%a1, (120,%a2) | ||
492 | movem.l (160,%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
493 | movem.l %d0-%d7/%a0-%a1, (160,%a2) | ||
494 | |||
495 | move.l %a2, %a5 | p->buf = &p->historybuffer[0] | ||
496 | |||
497 | subq.l #1, (%sp) | decrease loop count | ||
498 | bne.w .loop | ||
499 | |||
500 | bra.s .done | ||
501 | .size predictor_decode_stereo, .-predictor_decode_stereo | ||
502 | |||
503 | |||
504 | .global predictor_decode_mono | ||
505 | .type predictor_decode_mono,@function | ||
506 | |||
507 | | void predictor_decode_mono(struct predictor_t* p, | ||
508 | | int32_t* decoded0, | ||
509 | | int count) | ||
510 | |||
511 | predictor_decode_mono: | ||
512 | lea.l (-11*4,%sp), %sp | ||
513 | movem.l %d2-%d7/%a2-%a6, (%sp) | ||
514 | |||
515 | move.l #0, %macsr | signed integer mode | ||
516 | |||
517 | move.l (11*4+4,%sp), %a6 | %a6 = p | ||
518 | move.l (11*4+8,%sp), %a4 | %a4 = decoded0 | ||
519 | move.l (11*4+12,%sp), %d7 | %d7 = count | ||
520 | move.l (%a6), %a5 | %a5 = p->buf | ||
521 | |||
522 | move.l (YlastA,%a6), %d3 | %d3 = p->YlastA | ||
523 | |||
524 | .loopm: | ||
525 | |||
526 | | ***** PREDICTOR ***** | ||
527 | |||
528 | movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3] | ||
529 | | %d1 = p->buf[YDELAYA-2] | ||
530 | | %d2 = p->buf[YDELAYA-1] | ||
531 | |||
532 | move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3 | ||
533 | |||
534 | sub.l %d3, %d2 | ||
535 | neg.l %d2 | %d2 = %d3 - %d2 | ||
536 | |||
537 | move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2 | ||
538 | |||
539 | movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0] | ||
540 | | %a1 = p->YcoeffsA[1] | ||
541 | | %a2 = p->YcoeffsA[2] | ||
542 | | %a3 = p->YcoeffsA[3] | ||
543 | |||
544 | mac.l %d3, %a0, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0] | ||
545 | mac.l %d2, %a1, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] | ||
546 | mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | ||
547 | mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | ||
548 | |||
549 | tst.l %d2 | ||
550 | beq.s 1f | ||
551 | spl.b %d2 | pos: 0x??????ff, neg: 0x??????00 | ||
552 | extb.l %d2 | pos: 0xffffffff, neg: 0x00000000 | ||
553 | or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001 | ||
554 | 1: | %d2 = SIGN(%d2) | ||
555 | move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2 | ||
556 | |||
557 | tst.l %d3 | ||
558 | beq.s 1f | ||
559 | spl.b %d3 | ||
560 | extb.l %d3 | ||
561 | or.l #1, %d3 | ||
562 | 1: | %d3 = SIGN(%d3) | ||
563 | move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3 | ||
564 | |||
565 | move.l (%a4), %d0 | %d0 = *decoded0 | ||
566 | beq.s 3f | ||
567 | |||
568 | movem.l (YADAPTCOEFFSA-12,%a5),%d4-%d5 | %d4 = p->buf[YADAPTCOEFFSA-3] | ||
569 | | %d5 = p->buf[YADAPTCOEFFSA-2] | ||
570 | |||
571 | bmi.s 1f | flags still valid here | ||
572 | |||
573 | | *decoded0 > 0 | ||
574 | |||
575 | sub.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
576 | sub.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
577 | sub.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
578 | sub.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
579 | |||
580 | bra.s 2f | ||
581 | |||
582 | 1: | *decoded0 < 0 | ||
583 | |||
584 | add.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
585 | add.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
586 | add.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
587 | add.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
588 | |||
589 | 2: | ||
590 | movem.l %a0-%a3, (YcoeffsA,%a6) | save p->YcoeffsA[] | ||
591 | |||
592 | 3: | ||
593 | | Finish Predictor | ||
594 | |||
595 | movclr.l %acc0, %d3 | %d3 = predictionA | ||
596 | asr.l #8, %d3 | ||
597 | asr.l #2, %d3 | %d3 >>= 10 | ||
598 | add.l %d0, %d3 | %d3 += %d0 | ||
599 | |||
600 | move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA | ||
601 | move.l %d2, %d0 | ||
602 | lsl.l #5, %d2 | ||
603 | sub.l %d0, %d2 | %d2 = 31 * %d2 | ||
604 | asr.l #5, %d2 | %d2 >>= 5 | ||
605 | add.l %d3, %d2 | ||
606 | move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2 | ||
607 | |||
608 | | *decoded0 stored 3 instructions down, avoiding pipeline stall | ||
609 | |||
610 | | ***** COMMON ***** | ||
611 | |||
612 | addq.l #4, %a5 | p->buf++ | ||
613 | lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a3 | ||
614 | | %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE] | ||
615 | |||
616 | move.l %d2, (%a4)+ | *(decoded0++) = %d2 (p->YfilterA) | ||
617 | |||
618 | cmp.l %a3, %a5 | ||
619 | beq.s .move_histm | History buffer is full, we need to do a memmove | ||
620 | |||
621 | subq.l #1, %d7 | decrease loop count | ||
622 | bne.w .loopm | ||
623 | |||
624 | move.l %d3, (YlastA,%a6) | %d3 = p->YlastA | ||
625 | |||
626 | .donem: | ||
627 | move.l %a5, (%a6) | Save value of p->buf | ||
628 | movem.l (%sp), %d2-%d7/%a2-%a6 | ||
629 | lea.l (11*4,%sp), %sp | ||
630 | rts | ||
631 | |||
632 | .move_histm: | ||
633 | move.l %d3, (YlastA,%a6) | %d3 = p->YlastA | ||
634 | |||
635 | lea.l (historybuffer,%a6), %a3 | ||
636 | |||
637 | | dest = %a3 (p->historybuffer) | ||
638 | | src = %a5 (p->buf) | ||
639 | | n = 200 | ||
640 | |||
641 | movem.l (%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
642 | movem.l %d0-%d6/%a0-%a2, (%a3) | ||
643 | movem.l (40,%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
644 | movem.l %d0-%d6/%a0-%a2, (40,%a3) | ||
645 | movem.l (80,%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
646 | movem.l %d0-%d6/%a0-%a2, (80,%a3) | ||
647 | movem.l (120,%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
648 | movem.l %d0-%d6/%a0-%a2, (120,%a3) | ||
649 | movem.l (160,%a5), %d0-%d6/%a0-%a2 | 40 bytes | ||
650 | movem.l %d0-%d6/%a0-%a2, (160,%a3) | ||
651 | |||
652 | move.l %a3, %a5 | p->buf = &p->historybuffer[0] | ||
653 | |||
654 | move.l (YlastA,%a6), %d3 | %d3 = p->YlastA | ||
655 | |||
656 | subq.l #1, %d7 | decrease loop count | ||
657 | bne.w .loopm | ||
658 | |||
659 | bra.s .donem | ||
660 | .size predictor_decode_mono, .-predictor_decode_mono | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/predictor.c b/lib/rbcodec/codecs/demac/libdemac/predictor.c new file mode 100644 index 0000000000..45912dddbd --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/predictor.c | |||
@@ -0,0 +1,271 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #include <inttypes.h> | ||
26 | #include <string.h> | ||
27 | |||
28 | #include "parser.h" | ||
29 | #include "predictor.h" | ||
30 | #include "demac_config.h" | ||
31 | |||
32 | /* Return 0 if x is zero, -1 if x is positive, 1 if x is negative */ | ||
33 | #define SIGN(x) (x) ? (((x) > 0) ? -1 : 1) : 0 | ||
34 | |||
35 | static const int32_t initial_coeffs[4] = { | ||
36 | 360, 317, -109, 98 | ||
37 | }; | ||
38 | |||
39 | #define YDELAYA (18 + PREDICTOR_ORDER*4) | ||
40 | #define YDELAYB (18 + PREDICTOR_ORDER*3) | ||
41 | #define XDELAYA (18 + PREDICTOR_ORDER*2) | ||
42 | #define XDELAYB (18 + PREDICTOR_ORDER) | ||
43 | |||
44 | #define YADAPTCOEFFSA (18) | ||
45 | #define XADAPTCOEFFSA (14) | ||
46 | #define YADAPTCOEFFSB (10) | ||
47 | #define XADAPTCOEFFSB (5) | ||
48 | |||
49 | void init_predictor_decoder(struct predictor_t* p) | ||
50 | { | ||
51 | /* Zero the history buffers */ | ||
52 | memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(int32_t)); | ||
53 | p->buf = p->historybuffer; | ||
54 | |||
55 | /* Initialise and zero the co-efficients */ | ||
56 | memcpy(p->YcoeffsA, initial_coeffs, sizeof(initial_coeffs)); | ||
57 | memcpy(p->XcoeffsA, initial_coeffs, sizeof(initial_coeffs)); | ||
58 | memset(p->YcoeffsB, 0, sizeof(p->YcoeffsB)); | ||
59 | memset(p->XcoeffsB, 0, sizeof(p->XcoeffsB)); | ||
60 | |||
61 | p->YfilterA = 0; | ||
62 | p->YfilterB = 0; | ||
63 | p->YlastA = 0; | ||
64 | |||
65 | p->XfilterA = 0; | ||
66 | p->XfilterB = 0; | ||
67 | p->XlastA = 0; | ||
68 | } | ||
69 | |||
70 | #if !defined(CPU_ARM) && !defined(CPU_COLDFIRE) | ||
71 | void ICODE_ATTR_DEMAC predictor_decode_stereo(struct predictor_t* p, | ||
72 | int32_t* decoded0, | ||
73 | int32_t* decoded1, | ||
74 | int count) | ||
75 | { | ||
76 | int32_t predictionA, predictionB; | ||
77 | |||
78 | while (LIKELY(count--)) | ||
79 | { | ||
80 | /* Predictor Y */ | ||
81 | p->buf[YDELAYA] = p->YlastA; | ||
82 | p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]); | ||
83 | |||
84 | p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1]; | ||
85 | p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]); | ||
86 | |||
87 | predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) + | ||
88 | (p->buf[YDELAYA-1] * p->YcoeffsA[1]) + | ||
89 | (p->buf[YDELAYA-2] * p->YcoeffsA[2]) + | ||
90 | (p->buf[YDELAYA-3] * p->YcoeffsA[3]); | ||
91 | |||
92 | /* Apply a scaled first-order filter compression */ | ||
93 | p->buf[YDELAYB] = p->XfilterA - ((p->YfilterB * 31) >> 5); | ||
94 | p->buf[YADAPTCOEFFSB] = SIGN(p->buf[YDELAYB]); | ||
95 | p->YfilterB = p->XfilterA; | ||
96 | |||
97 | p->buf[YDELAYB-1] = p->buf[YDELAYB] - p->buf[YDELAYB-1]; | ||
98 | p->buf[YADAPTCOEFFSB-1] = SIGN(p->buf[YDELAYB-1]); | ||
99 | |||
100 | predictionB = (p->buf[YDELAYB] * p->YcoeffsB[0]) + | ||
101 | (p->buf[YDELAYB-1] * p->YcoeffsB[1]) + | ||
102 | (p->buf[YDELAYB-2] * p->YcoeffsB[2]) + | ||
103 | (p->buf[YDELAYB-3] * p->YcoeffsB[3]) + | ||
104 | (p->buf[YDELAYB-4] * p->YcoeffsB[4]); | ||
105 | |||
106 | p->YlastA = *decoded0 + ((predictionA + (predictionB >> 1)) >> 10); | ||
107 | p->YfilterA = p->YlastA + ((p->YfilterA * 31) >> 5); | ||
108 | |||
109 | /* Predictor X */ | ||
110 | |||
111 | p->buf[XDELAYA] = p->XlastA; | ||
112 | p->buf[XADAPTCOEFFSA] = SIGN(p->buf[XDELAYA]); | ||
113 | p->buf[XDELAYA-1] = p->buf[XDELAYA] - p->buf[XDELAYA-1]; | ||
114 | p->buf[XADAPTCOEFFSA-1] = SIGN(p->buf[XDELAYA-1]); | ||
115 | |||
116 | predictionA = (p->buf[XDELAYA] * p->XcoeffsA[0]) + | ||
117 | (p->buf[XDELAYA-1] * p->XcoeffsA[1]) + | ||
118 | (p->buf[XDELAYA-2] * p->XcoeffsA[2]) + | ||
119 | (p->buf[XDELAYA-3] * p->XcoeffsA[3]); | ||
120 | |||
121 | /* Apply a scaled first-order filter compression */ | ||
122 | p->buf[XDELAYB] = p->YfilterA - ((p->XfilterB * 31) >> 5); | ||
123 | p->buf[XADAPTCOEFFSB] = SIGN(p->buf[XDELAYB]); | ||
124 | p->XfilterB = p->YfilterA; | ||
125 | p->buf[XDELAYB-1] = p->buf[XDELAYB] - p->buf[XDELAYB-1]; | ||
126 | p->buf[XADAPTCOEFFSB-1] = SIGN(p->buf[XDELAYB-1]); | ||
127 | |||
128 | predictionB = (p->buf[XDELAYB] * p->XcoeffsB[0]) + | ||
129 | (p->buf[XDELAYB-1] * p->XcoeffsB[1]) + | ||
130 | (p->buf[XDELAYB-2] * p->XcoeffsB[2]) + | ||
131 | (p->buf[XDELAYB-3] * p->XcoeffsB[3]) + | ||
132 | (p->buf[XDELAYB-4] * p->XcoeffsB[4]); | ||
133 | |||
134 | p->XlastA = *decoded1 + ((predictionA + (predictionB >> 1)) >> 10); | ||
135 | p->XfilterA = p->XlastA + ((p->XfilterA * 31) >> 5); | ||
136 | |||
137 | if (LIKELY(*decoded0 != 0)) | ||
138 | { | ||
139 | if (*decoded0 > 0) | ||
140 | { | ||
141 | p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA]; | ||
142 | p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1]; | ||
143 | p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2]; | ||
144 | p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3]; | ||
145 | |||
146 | p->YcoeffsB[0] -= p->buf[YADAPTCOEFFSB]; | ||
147 | p->YcoeffsB[1] -= p->buf[YADAPTCOEFFSB-1]; | ||
148 | p->YcoeffsB[2] -= p->buf[YADAPTCOEFFSB-2]; | ||
149 | p->YcoeffsB[3] -= p->buf[YADAPTCOEFFSB-3]; | ||
150 | p->YcoeffsB[4] -= p->buf[YADAPTCOEFFSB-4]; | ||
151 | } | ||
152 | else | ||
153 | { | ||
154 | p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA]; | ||
155 | p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1]; | ||
156 | p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2]; | ||
157 | p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3]; | ||
158 | |||
159 | p->YcoeffsB[0] += p->buf[YADAPTCOEFFSB]; | ||
160 | p->YcoeffsB[1] += p->buf[YADAPTCOEFFSB-1]; | ||
161 | p->YcoeffsB[2] += p->buf[YADAPTCOEFFSB-2]; | ||
162 | p->YcoeffsB[3] += p->buf[YADAPTCOEFFSB-3]; | ||
163 | p->YcoeffsB[4] += p->buf[YADAPTCOEFFSB-4]; | ||
164 | } | ||
165 | } | ||
166 | |||
167 | *(decoded0++) = p->YfilterA; | ||
168 | |||
169 | if (LIKELY(*decoded1 != 0)) | ||
170 | { | ||
171 | if (*decoded1 > 0) | ||
172 | { | ||
173 | p->XcoeffsA[0] -= p->buf[XADAPTCOEFFSA]; | ||
174 | p->XcoeffsA[1] -= p->buf[XADAPTCOEFFSA-1]; | ||
175 | p->XcoeffsA[2] -= p->buf[XADAPTCOEFFSA-2]; | ||
176 | p->XcoeffsA[3] -= p->buf[XADAPTCOEFFSA-3]; | ||
177 | |||
178 | p->XcoeffsB[0] -= p->buf[XADAPTCOEFFSB]; | ||
179 | p->XcoeffsB[1] -= p->buf[XADAPTCOEFFSB-1]; | ||
180 | p->XcoeffsB[2] -= p->buf[XADAPTCOEFFSB-2]; | ||
181 | p->XcoeffsB[3] -= p->buf[XADAPTCOEFFSB-3]; | ||
182 | p->XcoeffsB[4] -= p->buf[XADAPTCOEFFSB-4]; | ||
183 | } | ||
184 | else | ||
185 | { | ||
186 | p->XcoeffsA[0] += p->buf[XADAPTCOEFFSA]; | ||
187 | p->XcoeffsA[1] += p->buf[XADAPTCOEFFSA-1]; | ||
188 | p->XcoeffsA[2] += p->buf[XADAPTCOEFFSA-2]; | ||
189 | p->XcoeffsA[3] += p->buf[XADAPTCOEFFSA-3]; | ||
190 | |||
191 | p->XcoeffsB[0] += p->buf[XADAPTCOEFFSB]; | ||
192 | p->XcoeffsB[1] += p->buf[XADAPTCOEFFSB-1]; | ||
193 | p->XcoeffsB[2] += p->buf[XADAPTCOEFFSB-2]; | ||
194 | p->XcoeffsB[3] += p->buf[XADAPTCOEFFSB-3]; | ||
195 | p->XcoeffsB[4] += p->buf[XADAPTCOEFFSB-4]; | ||
196 | } | ||
197 | } | ||
198 | |||
199 | *(decoded1++) = p->XfilterA; | ||
200 | |||
201 | /* Combined */ | ||
202 | p->buf++; | ||
203 | |||
204 | /* Have we filled the history buffer? */ | ||
205 | if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) { | ||
206 | memmove(p->historybuffer, p->buf, | ||
207 | PREDICTOR_SIZE * sizeof(int32_t)); | ||
208 | p->buf = p->historybuffer; | ||
209 | } | ||
210 | } | ||
211 | } | ||
212 | |||
213 | void ICODE_ATTR_DEMAC predictor_decode_mono(struct predictor_t* p, | ||
214 | int32_t* decoded0, | ||
215 | int count) | ||
216 | { | ||
217 | int32_t predictionA, currentA, A; | ||
218 | |||
219 | currentA = p->YlastA; | ||
220 | |||
221 | while (LIKELY(count--)) | ||
222 | { | ||
223 | A = *decoded0; | ||
224 | |||
225 | p->buf[YDELAYA] = currentA; | ||
226 | p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1]; | ||
227 | |||
228 | predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) + | ||
229 | (p->buf[YDELAYA-1] * p->YcoeffsA[1]) + | ||
230 | (p->buf[YDELAYA-2] * p->YcoeffsA[2]) + | ||
231 | (p->buf[YDELAYA-3] * p->YcoeffsA[3]); | ||
232 | |||
233 | currentA = A + (predictionA >> 10); | ||
234 | |||
235 | p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]); | ||
236 | p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]); | ||
237 | |||
238 | if (LIKELY(A != 0)) | ||
239 | { | ||
240 | if (A > 0) | ||
241 | { | ||
242 | p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA]; | ||
243 | p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1]; | ||
244 | p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2]; | ||
245 | p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3]; | ||
246 | } | ||
247 | else | ||
248 | { | ||
249 | p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA]; | ||
250 | p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1]; | ||
251 | p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2]; | ||
252 | p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3]; | ||
253 | } | ||
254 | } | ||
255 | |||
256 | p->buf++; | ||
257 | |||
258 | /* Have we filled the history buffer? */ | ||
259 | if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) { | ||
260 | memmove(p->historybuffer, p->buf, | ||
261 | PREDICTOR_SIZE * sizeof(int32_t)); | ||
262 | p->buf = p->historybuffer; | ||
263 | } | ||
264 | |||
265 | p->YfilterA = currentA + ((p->YfilterA * 31) >> 5); | ||
266 | *(decoded0++) = p->YfilterA; | ||
267 | } | ||
268 | |||
269 | p->YlastA = currentA; | ||
270 | } | ||
271 | #endif | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/predictor.h b/lib/rbcodec/codecs/demac/libdemac/predictor.h new file mode 100644 index 0000000000..6a0a81983b --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/predictor.h | |||
@@ -0,0 +1,38 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #ifndef _APE_PREDICTOR_H | ||
26 | #define _APE_PREDICTOR_H | ||
27 | |||
28 | #include <inttypes.h> | ||
29 | #include "parser.h" | ||
30 | #include "filter.h" | ||
31 | |||
32 | void init_predictor_decoder(struct predictor_t* p); | ||
33 | void predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, | ||
34 | int32_t* decoded1, int count); | ||
35 | void predictor_decode_mono(struct predictor_t* p, int32_t* decoded0, | ||
36 | int count); | ||
37 | |||
38 | #endif | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/udiv32_arm-pre.S b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm-pre.S new file mode 100644 index 0000000000..459cab8240 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm-pre.S | |||
@@ -0,0 +1,25 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2010 by Andrew Mahone | ||
11 | * | ||
12 | * Wrapper for udiv32_arm.S to test available IRAM by pre-linking the codec. | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or | ||
15 | * modify it under the terms of the GNU General Public License | ||
16 | * as published by the Free Software Foundation; either version 2 | ||
17 | * of the License, or (at your option) any later version. | ||
18 | * | ||
19 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
20 | * KIND, either express or implied. | ||
21 | * | ||
22 | ****************************************************************************/ | ||
23 | |||
24 | #define APE_PRE | ||
25 | #include "udiv32_arm.S" | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S new file mode 100644 index 0000000000..7b851659bd --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S | |||
@@ -0,0 +1,318 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2008 by Jens Arnold | ||
11 | * Copyright (C) 2009 by Andrew Mahone | ||
12 | * | ||
13 | * Optimised unsigned integer division for ARMv4 | ||
14 | * | ||
15 | * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System | ||
16 | * Developer's Guide | ||
17 | * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) | ||
18 | * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 | ||
19 | * Free Software Foundation, Inc. | ||
20 | * | ||
21 | * This program is free software; you can redistribute it and/or | ||
22 | * modify it under the terms of the GNU General Public License | ||
23 | * as published by the Free Software Foundation; either version 2 | ||
24 | * of the License, or (at your option) any later version. | ||
25 | * | ||
26 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
27 | * KIND, either express or implied. | ||
28 | * | ||
29 | ****************************************************************************/ | ||
30 | |||
31 | #include "config.h" | ||
32 | /* On targets with codec iram, a header file will be generated after an initial | ||
33 | link of the APE codec, stating the amount of IRAM remaining for use by the | ||
34 | reciprocal lookup table. */ | ||
35 | #if !defined(APE_PRE) && defined(USE_IRAM) && ARM_ARCH < 5 | ||
36 | #include "lib/rbcodec/codecs/ape_free_iram.h" | ||
37 | #endif | ||
38 | |||
39 | /* Codecs should not normally do this, but we need to check a macro, and | ||
40 | * codecs.h would confuse the assembler. */ | ||
41 | |||
42 | #ifdef USE_IRAM | ||
43 | #define DIV_RECIP | ||
44 | .section .icode,"ax",%progbits | ||
45 | #else | ||
46 | .text | ||
47 | #endif | ||
48 | .align | ||
49 | .global udiv32_arm | ||
50 | .type udiv32_arm,%function | ||
51 | |||
52 | #if ARM_ARCH < 5 | ||
53 | /* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2) | ||
54 | for dividing a 30-bit value by a 15-bit value, with two operations per | ||
55 | iteration by storing quotient and remainder together and adding the previous | ||
56 | quotient bit during trial subtraction. Modified to work with any dividend | ||
57 | and divisor both less than 1 << 30, and skipping trials by calculating bits | ||
58 | in output. */ | ||
59 | .macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder | ||
60 | |||
61 | mov \bits, #1 | ||
62 | /* Shift the divisor left until it aligns with the numerator. If it already | ||
63 | has the high bit set, this is fine, everything inside .rept will be | ||
64 | skipped, and the add before and adcs after will set the one-bit result | ||
65 | to zero. */ | ||
66 | cmn \divisor, \dividend, lsr #16 | ||
67 | movcs \divisor, \divisor, lsl #16 | ||
68 | addcs \bits, \bits, #16 | ||
69 | cmn \divisor, \dividend, lsr #8 | ||
70 | movcs \divisor, \divisor, lsl #8 | ||
71 | addcs \bits, \bits, #8 | ||
72 | cmn \divisor, \dividend, lsr #4 | ||
73 | movcs \divisor, \divisor, lsl #4 | ||
74 | addcs \bits, \bits, #4 | ||
75 | cmn \divisor, \dividend, lsr #2 | ||
76 | movcs \divisor, \divisor, lsl #2 | ||
77 | addcs \bits, \bits, #2 | ||
78 | cmn \divisor, \dividend, lsr #1 | ||
79 | movcs \divisor, \divisor, lsl #1 | ||
80 | addcs \bits, \bits, #1 | ||
81 | adds \result, \dividend, \divisor | ||
82 | subcc \result, \result, \divisor | ||
83 | rsb \curbit, \bits, #31 | ||
84 | add pc, pc, \curbit, lsl #3 | ||
85 | nop | ||
86 | .rept 30 | ||
87 | adcs \result, \divisor, \result, lsl #1 | ||
88 | /* Fix the remainder portion of the result. This must be done because the | ||
89 | handler for 32-bit numerators needs the remainder. */ | ||
90 | subcc \result, \result, \divisor | ||
91 | .endr | ||
92 | /* Shift remainder/quotient left one, add final quotient bit */ | ||
93 | adc \result, \result, \result | ||
94 | mov \remainder, \result, lsr \bits | ||
95 | eor \quotient, \result, \remainder, lsl \bits | ||
96 | .endm | ||
97 | |||
98 | #ifndef FREE_IRAM | ||
99 | .set recip_max, 2 | ||
100 | #else | ||
101 | /* Each table entry is one word. Since a compare is done against the maximum | ||
102 | entry as an immediate, the maximum entry must be a valid ARM immediate, | ||
103 | which means a byte shifted by an even number of places. */ | ||
104 | .set recip_max, 2 + FREE_IRAM / 4 | ||
105 | .set recip_max_tmp, recip_max >> 8 | ||
106 | .set recip_mask_shift, 0 | ||
107 | .set tmp_shift, 16 | ||
108 | .rept 5 | ||
109 | .if recip_max_tmp >> tmp_shift | ||
110 | .set recip_max_tmp, recip_max_tmp >> tmp_shift | ||
111 | .set recip_mask_shift, recip_mask_shift + tmp_shift | ||
112 | .endif | ||
113 | .set tmp_shift, tmp_shift >> 1 | ||
114 | .endr | ||
115 | .if recip_max_tmp | ||
116 | .set recip_mask_shift, recip_mask_shift + 1 | ||
117 | .endif | ||
118 | .set recip_mask_shift, (recip_mask_shift + 1) & 62 | ||
119 | .set recip_max, recip_max & (255 << recip_mask_shift) | ||
120 | //.set recip_max, 2 | ||
121 | #endif | ||
122 | |||
123 | udiv32_arm: | ||
124 | #ifdef DIV_RECIP | ||
125 | cmp r1, #3 | ||
126 | bcc .L_udiv_tiny | ||
127 | cmp r1, #recip_max | ||
128 | bhi .L_udiv | ||
129 | adr r3, .L_udiv_recip_table-12 | ||
130 | ldr r2, [r3, r1, lsl #2] | ||
131 | mov r3, r0 | ||
132 | umull ip, r0, r2, r0 | ||
133 | mul r2, r0, r1 | ||
134 | cmp r3, r2 | ||
135 | bxcs lr | ||
136 | sub r0, r0, #1 | ||
137 | bx lr | ||
138 | .L_udiv_tiny: | ||
139 | cmp r1, #1 | ||
140 | movhi r0, r0, lsr #1 | ||
141 | bxcs lr | ||
142 | b .L_div0 | ||
143 | #endif | ||
144 | .L_udiv: | ||
145 | /* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor | ||
146 | and add the next bit of the result. The correction code at .L_udiv32 | ||
147 | does not need the divisor inverted, but can be modified to work with it, | ||
148 | and this allows the zero divisor test to be done early and without an | ||
149 | explicit comparison. */ | ||
150 | rsbs r1, r1, #0 | ||
151 | #ifndef DIV_RECIP | ||
152 | beq .L_div0 | ||
153 | #endif | ||
154 | tst r0, r0 | ||
155 | /* High bit must be unset, otherwise shift numerator right, calculate, | ||
156 | and correct results. As this case is very uncommon we want to avoid | ||
157 | any other delays on the main path in handling it, so the long divide | ||
158 | calls the short divide as a function. */ | ||
159 | bmi .L_udiv32 | ||
160 | .L_udiv31: | ||
161 | ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1 | ||
162 | bx lr | ||
163 | .L_udiv32: | ||
164 | /* store original numerator and divisor, we'll need them to correct the | ||
165 | result, */ | ||
166 | stmdb sp, { r0, r1, lr } | ||
167 | /* Call __div0 here if divisor is zero, otherwise it would report the wrong | ||
168 | address. */ | ||
169 | mov r0, r0, lsr #1 | ||
170 | bl .L_udiv31 | ||
171 | ldmdb sp, { r2, r3, lr } | ||
172 | /* Move the low bit of the original numerator to the carry bit */ | ||
173 | movs r2, r2, lsr #1 | ||
174 | /* Shift the remainder left one and add in the carry bit */ | ||
175 | adc r1, r1, r1 | ||
176 | /* Subtract the original divisor from the remainder, setting carry if the | ||
177 | result is non-negative */ | ||
178 | adds r1, r1, r3 | ||
179 | /* Shift quotient left one and add carry bit */ | ||
180 | adc r0, r0, r0 | ||
181 | bx lr | ||
182 | .L_div0: | ||
183 | /* __div0 expects the calling address on the top of the stack */ | ||
184 | stmdb sp!, { lr } | ||
185 | mov r0, #0 | ||
186 | #if defined(__ARM_EABI__) || !defined(USE_IRAM) | ||
187 | bl __div0 | ||
188 | #else | ||
189 | ldr pc, [pc, #-4] | ||
190 | .word __div0 | ||
191 | #endif | ||
192 | #ifdef DIV_RECIP | ||
193 | .L_udiv_recip_table: | ||
194 | .set div, 3 | ||
195 | .rept recip_max - 2 | ||
196 | .if (div - 1) & div | ||
197 | .set q, 0x40000000 / div | ||
198 | .set r, (0x40000000 - (q * div))<<1 | ||
199 | .set q, q << 1 | ||
200 | .if r >= div | ||
201 | .set q, q + 1 | ||
202 | .set r, r - div | ||
203 | .endif | ||
204 | .set r, r << 1 | ||
205 | .set q, q << 1 | ||
206 | .if r >= div | ||
207 | .set q, q + 1 | ||
208 | .set r, r - div | ||
209 | .endif | ||
210 | .set q, q + 1 | ||
211 | .else | ||
212 | .set q, 0x40000000 / div * 4 | ||
213 | .endif | ||
214 | .word q | ||
215 | .set div, div+1 | ||
216 | .endr | ||
217 | #endif | ||
218 | .size udiv32_arm, . - udiv32_arm | ||
219 | |||
220 | #else | ||
221 | .macro ARMV5_UDIV32_BODY numerator, divisor, quotient, bits, inv, neg, div0label | ||
222 | cmp \numerator, \divisor | ||
223 | clz \bits, \divisor | ||
224 | bcc 30f | ||
225 | mov \inv, \divisor, lsl \bits | ||
226 | add \neg, pc, \inv, lsr #25 | ||
227 | cmp \inv, #1<<31 | ||
228 | ldrhib \inv, [\neg, #.L_udiv_est_table-.-64] | ||
229 | bls 20f | ||
230 | subs \bits, \bits, #7 | ||
231 | rsb \neg, \divisor, #0 | ||
232 | movpl \divisor, \inv, lsl \bits | ||
233 | bmi 10f | ||
234 | mul \inv, \divisor, \neg | ||
235 | smlawt \divisor, \divisor, \inv, \divisor | ||
236 | mul \inv, \divisor, \neg | ||
237 | /* This will save a cycle on ARMv6, but requires that the numerator sign | ||
238 | bit is not set (that of inv is guaranteed unset). The branch should | ||
239 | predict very well, making it typically 1 cycle, and thus both the branch | ||
240 | and test fill delay cycles for the multiplies. Based on logging of | ||
241 | numerator sizes in the APE codec, the branch is taken about 1/10^7 of | ||
242 | the time. */ | ||
243 | #if ARM_ARCH >= 6 | ||
244 | tst \numerator, \numerator | ||
245 | smmla \divisor, \divisor, \inv, \divisor | ||
246 | bmi 40f | ||
247 | smmul \inv, \numerator, \divisor | ||
248 | #else | ||
249 | mov \bits, #0 | ||
250 | smlal \bits, \divisor, \inv, \divisor | ||
251 | umull \bits, \inv, \numerator, \divisor | ||
252 | #endif | ||
253 | add \numerator, \numerator, \neg | ||
254 | mla \divisor, \inv, \neg, \numerator | ||
255 | mov \quotient, \inv | ||
256 | cmn \divisor, \neg | ||
257 | addcc \quotient, \quotient, #1 | ||
258 | addpl \quotient, \quotient, #2 | ||
259 | bx lr | ||
260 | 10: | ||
261 | rsb \bits, \bits, #0 | ||
262 | sub \inv, \inv, #4 | ||
263 | mov \divisor, \inv, lsr \bits | ||
264 | umull \bits, \inv, \numerator, \divisor | ||
265 | mla \divisor, \inv, \neg, \numerator | ||
266 | mov \quotient, \inv | ||
267 | cmn \neg, \divisor, lsr #1 | ||
268 | addcs \divisor, \divisor, \neg, lsl #1 | ||
269 | addcs \quotient, \quotient, #2 | ||
270 | cmn \neg, \divisor | ||
271 | addcs \quotient, \quotient, #1 | ||
272 | bx lr | ||
273 | 20: | ||
274 | .ifnc "", "\div0label" | ||
275 | rsb \bits, \bits, #31 | ||
276 | bne \div0label | ||
277 | .endif | ||
278 | mov \quotient, \numerator, lsr \bits | ||
279 | bx lr | ||
280 | 30: | ||
281 | mov \quotient, #0 | ||
282 | bx lr | ||
283 | #if ARM_ARCH >= 6 | ||
284 | 40: | ||
285 | umull \bits, \inv, \numerator, \divisor | ||
286 | add \numerator, \numerator, \neg | ||
287 | mla \divisor, \inv, \neg, \numerator | ||
288 | mov \quotient, \inv | ||
289 | cmn \divisor, \neg | ||
290 | addcc \quotient, \quotient, #1 | ||
291 | addpl \quotient, \quotient, #2 | ||
292 | bx lr | ||
293 | #endif | ||
294 | .endm | ||
295 | |||
296 | udiv32_arm: | ||
297 | ARMV5_UDIV32_BODY r0, r1, r0, r2, r3, ip, .L_div0 | ||
298 | .L_div0: | ||
299 | /* __div0 expects the calling address on the top of the stack */ | ||
300 | stmdb sp!, { lr } | ||
301 | mov r0, #0 | ||
302 | #if defined(__ARM_EABI__) || !defined(USE_IRAM) | ||
303 | bl __div0 | ||
304 | #else | ||
305 | ldr pc, [pc, #-4] | ||
306 | .word __div0 | ||
307 | #endif | ||
308 | .L_udiv_est_table: | ||
309 | .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6 | ||
310 | .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf | ||
311 | .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc | ||
312 | .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac | ||
313 | .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f | ||
314 | .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93 | ||
315 | .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89 | ||
316 | .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81 | ||
317 | #endif | ||
318 | .size udiv32_arm, . - udiv32_arm | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv5te.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv5te.h new file mode 100644 index 0000000000..ae7427c137 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv5te.h | |||
@@ -0,0 +1,404 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | ARMv5te vector math copyright (C) 2008 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | #define FUSED_VECTOR_MATH | ||
28 | |||
29 | #define REPEAT_3(x) x x x | ||
30 | #if ORDER > 16 | ||
31 | #define REPEAT_MLA(x) x x x x x x x | ||
32 | #else | ||
33 | #define REPEAT_MLA(x) x x x | ||
34 | #endif | ||
35 | |||
36 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) | ||
37 | * This version fetches data as 32 bit words, and *requires* v1 to be | ||
38 | * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit | ||
39 | * aligned or both unaligned. If either condition isn't met, it will either | ||
40 | * result in a data abort or incorrect results. */ | ||
41 | static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | ||
42 | { | ||
43 | int res; | ||
44 | #if ORDER > 16 | ||
45 | int cnt = ORDER>>4; | ||
46 | #endif | ||
47 | |||
48 | #define ADDHALFREGS(sum, s1, s2) /* Adds register */ \ | ||
49 | "mov " #s1 ", " #s1 ", ror #16 \n" /* halves straight */ \ | ||
50 | "add " #sum ", " #s1 ", " #s2 ", lsl #16 \n" /* Clobbers 's1' */ \ | ||
51 | "add " #s1 ", " #s1 ", " #s2 ", lsr #16 \n" \ | ||
52 | "mov " #s1 ", " #s1 ", lsl #16 \n" \ | ||
53 | "orr " #sum ", " #s1 ", " #sum ", lsr #16 \n" | ||
54 | |||
55 | #define ADDHALFXREGS(sum, s1, s2) /* Adds register */ \ | ||
56 | "add " #s1 ", " #s1 ", " #sum ", lsl #16 \n" /* halves across. */ \ | ||
57 | "add " #sum ", " #s2 ", " #sum ", lsr #16 \n" /* Clobbers 's1'. */ \ | ||
58 | "mov " #sum ", " #sum ", lsl #16 \n" \ | ||
59 | "orr " #sum ", " #sum ", " #s1 ", lsr #16 \n" | ||
60 | |||
61 | asm volatile ( | ||
62 | #if ORDER > 16 | ||
63 | "mov %[res], #0 \n" | ||
64 | #endif | ||
65 | "tst %[f2], #2 \n" | ||
66 | "beq 20f \n" | ||
67 | |||
68 | "10: \n" | ||
69 | "ldrh r4, [%[s2]], #2 \n" | ||
70 | "mov r4, r4, lsl #16 \n" | ||
71 | "ldrh r3, [%[f2]], #2 \n" | ||
72 | #if ORDER > 16 | ||
73 | "mov r3, r3, lsl #16 \n" | ||
74 | "1: \n" | ||
75 | "ldmia %[v1], {r0,r1} \n" | ||
76 | "smlabt %[res], r0, r3, %[res] \n" | ||
77 | #else | ||
78 | "ldmia %[v1], {r0,r1} \n" | ||
79 | "smulbb %[res], r0, r3 \n" | ||
80 | #endif | ||
81 | "ldmia %[f2]!, {r2,r3} \n" | ||
82 | "smlatb %[res], r0, r2, %[res] \n" | ||
83 | "smlabt %[res], r1, r2, %[res] \n" | ||
84 | "smlatb %[res], r1, r3, %[res] \n" | ||
85 | "ldmia %[s2]!, {r2,r5} \n" | ||
86 | ADDHALFXREGS(r0, r4, r2) | ||
87 | ADDHALFXREGS(r1, r2, r5) | ||
88 | "stmia %[v1]!, {r0,r1} \n" | ||
89 | "ldmia %[v1], {r0,r1} \n" | ||
90 | "smlabt %[res], r0, r3, %[res] \n" | ||
91 | "ldmia %[f2]!, {r2,r3} \n" | ||
92 | "smlatb %[res], r0, r2, %[res] \n" | ||
93 | "smlabt %[res], r1, r2, %[res] \n" | ||
94 | "smlatb %[res], r1, r3, %[res] \n" | ||
95 | "ldmia %[s2]!, {r2,r4} \n" | ||
96 | ADDHALFXREGS(r0, r5, r2) | ||
97 | ADDHALFXREGS(r1, r2, r4) | ||
98 | "stmia %[v1]!, {r0,r1} \n" | ||
99 | |||
100 | "ldmia %[v1], {r0,r1} \n" | ||
101 | "smlabt %[res], r0, r3, %[res] \n" | ||
102 | "ldmia %[f2]!, {r2,r3} \n" | ||
103 | "smlatb %[res], r0, r2, %[res] \n" | ||
104 | "smlabt %[res], r1, r2, %[res] \n" | ||
105 | "smlatb %[res], r1, r3, %[res] \n" | ||
106 | "ldmia %[s2]!, {r2,r5} \n" | ||
107 | ADDHALFXREGS(r0, r4, r2) | ||
108 | ADDHALFXREGS(r1, r2, r5) | ||
109 | "stmia %[v1]!, {r0,r1} \n" | ||
110 | "ldmia %[v1], {r0,r1} \n" | ||
111 | "smlabt %[res], r0, r3, %[res] \n" | ||
112 | "ldmia %[f2]!, {r2,r3} \n" | ||
113 | "smlatb %[res], r0, r2, %[res] \n" | ||
114 | "smlabt %[res], r1, r2, %[res] \n" | ||
115 | "smlatb %[res], r1, r3, %[res] \n" | ||
116 | "ldmia %[s2]!, {r2,r4} \n" | ||
117 | ADDHALFXREGS(r0, r5, r2) | ||
118 | ADDHALFXREGS(r1, r2, r4) | ||
119 | "stmia %[v1]!, {r0,r1} \n" | ||
120 | #if ORDER > 16 | ||
121 | "subs %[cnt], %[cnt], #1 \n" | ||
122 | "bne 1b \n" | ||
123 | #endif | ||
124 | "b 99f \n" | ||
125 | |||
126 | "20: \n" | ||
127 | "1: \n" | ||
128 | "ldmia %[v1], {r1,r2} \n" | ||
129 | "ldmia %[f2]!, {r3,r4} \n" | ||
130 | #if ORDER > 16 | ||
131 | "smlabb %[res], r1, r3, %[res] \n" | ||
132 | #else | ||
133 | "smulbb %[res], r1, r3 \n" | ||
134 | #endif | ||
135 | "smlatt %[res], r1, r3, %[res] \n" | ||
136 | "smlabb %[res], r2, r4, %[res] \n" | ||
137 | "smlatt %[res], r2, r4, %[res] \n" | ||
138 | "ldmia %[s2]!, {r3,r4} \n" | ||
139 | ADDHALFREGS(r0, r1, r3) | ||
140 | ADDHALFREGS(r1, r2, r4) | ||
141 | "stmia %[v1]!, {r0,r1} \n" | ||
142 | |||
143 | REPEAT_3( | ||
144 | "ldmia %[v1], {r1,r2} \n" | ||
145 | "ldmia %[f2]!, {r3,r4} \n" | ||
146 | "smlabb %[res], r1, r3, %[res] \n" | ||
147 | "smlatt %[res], r1, r3, %[res] \n" | ||
148 | "smlabb %[res], r2, r4, %[res] \n" | ||
149 | "smlatt %[res], r2, r4, %[res] \n" | ||
150 | "ldmia %[s2]!, {r3,r4} \n" | ||
151 | ADDHALFREGS(r0, r1, r3) | ||
152 | ADDHALFREGS(r1, r2, r4) | ||
153 | "stmia %[v1]!, {r0,r1} \n" | ||
154 | ) | ||
155 | #if ORDER > 16 | ||
156 | "subs %[cnt], %[cnt], #1 \n" | ||
157 | "bne 1b \n" | ||
158 | #endif | ||
159 | |||
160 | "99: \n" | ||
161 | : /* outputs */ | ||
162 | #if ORDER > 16 | ||
163 | [cnt]"+r"(cnt), | ||
164 | #endif | ||
165 | [v1] "+r"(v1), | ||
166 | [f2] "+r"(f2), | ||
167 | [s2] "+r"(s2), | ||
168 | [res]"=r"(res) | ||
169 | : /* inputs */ | ||
170 | : /* clobbers */ | ||
171 | "r0", "r1", "r2", "r3", "r4", "r5", "cc", "memory" | ||
172 | ); | ||
173 | return res; | ||
174 | } | ||
175 | |||
176 | /* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) | ||
177 | * This version fetches data as 32 bit words, and *requires* v1 to be | ||
178 | * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit | ||
179 | * aligned or both unaligned. If either condition isn't met, it will either | ||
180 | * result in a data abort or incorrect results. */ | ||
181 | static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | ||
182 | { | ||
183 | int res; | ||
184 | #if ORDER > 16 | ||
185 | int cnt = ORDER>>4; | ||
186 | #endif | ||
187 | |||
188 | #define SUBHALFREGS(dif, s1, s2) /* Subtracts reg. */ \ | ||
189 | "mov " #s1 ", " #s1 ", ror #16 \n" /* halves straight */ \ | ||
190 | "sub " #dif ", " #s1 ", " #s2 ", lsl #16 \n" /* Clobbers 's1' */ \ | ||
191 | "sub " #s1 ", " #s1 ", " #s2 ", lsr #16 \n" \ | ||
192 | "mov " #s1 ", " #s1 ", lsl #16 \n" \ | ||
193 | "orr " #dif ", " #s1 ", " #dif ", lsr #16 \n" | ||
194 | |||
195 | #define SUBHALFXREGS(dif, s1, s2, msk) /* Subtracts reg. */ \ | ||
196 | "sub " #s1 ", " #dif ", " #s1 ", lsr #16 \n" /* halves across. */ \ | ||
197 | "and " #s1 ", " #s1 ", " #msk " \n" /* Needs msk = */ \ | ||
198 | "rsb " #dif ", " #s2 ", " #dif ", lsr #16 \n" /* 0x0000ffff, */ \ | ||
199 | "orr " #dif ", " #s1 ", " #dif ", lsl #16 \n" /* clobbers 's1'. */ | ||
200 | |||
201 | asm volatile ( | ||
202 | #if ORDER > 16 | ||
203 | "mov %[res], #0 \n" | ||
204 | #endif | ||
205 | "tst %[f2], #2 \n" | ||
206 | "beq 20f \n" | ||
207 | |||
208 | "10: \n" | ||
209 | "mov r6, #0xff \n" | ||
210 | "orr r6, r6, #0xff00 \n" | ||
211 | "ldrh r4, [%[s2]], #2 \n" | ||
212 | "mov r4, r4, lsl #16 \n" | ||
213 | "ldrh r3, [%[f2]], #2 \n" | ||
214 | #if ORDER > 16 | ||
215 | "mov r3, r3, lsl #16 \n" | ||
216 | "1: \n" | ||
217 | "ldmia %[v1], {r0,r1} \n" | ||
218 | "smlabt %[res], r0, r3, %[res] \n" | ||
219 | #else | ||
220 | "ldmia %[v1], {r0,r1} \n" | ||
221 | "smulbb %[res], r0, r3 \n" | ||
222 | #endif | ||
223 | "ldmia %[f2]!, {r2,r3} \n" | ||
224 | "smlatb %[res], r0, r2, %[res] \n" | ||
225 | "smlabt %[res], r1, r2, %[res] \n" | ||
226 | "smlatb %[res], r1, r3, %[res] \n" | ||
227 | "ldmia %[s2]!, {r2,r5} \n" | ||
228 | SUBHALFXREGS(r0, r4, r2, r6) | ||
229 | SUBHALFXREGS(r1, r2, r5, r6) | ||
230 | "stmia %[v1]!, {r0,r1} \n" | ||
231 | "ldmia %[v1], {r0,r1} \n" | ||
232 | "smlabt %[res], r0, r3, %[res] \n" | ||
233 | "ldmia %[f2]!, {r2,r3} \n" | ||
234 | "smlatb %[res], r0, r2, %[res] \n" | ||
235 | "smlabt %[res], r1, r2, %[res] \n" | ||
236 | "smlatb %[res], r1, r3, %[res] \n" | ||
237 | "ldmia %[s2]!, {r2,r4} \n" | ||
238 | SUBHALFXREGS(r0, r5, r2, r6) | ||
239 | SUBHALFXREGS(r1, r2, r4, r6) | ||
240 | "stmia %[v1]!, {r0,r1} \n" | ||
241 | |||
242 | "ldmia %[v1], {r0,r1} \n" | ||
243 | "smlabt %[res], r0, r3, %[res] \n" | ||
244 | "ldmia %[f2]!, {r2,r3} \n" | ||
245 | "smlatb %[res], r0, r2, %[res] \n" | ||
246 | "smlabt %[res], r1, r2, %[res] \n" | ||
247 | "smlatb %[res], r1, r3, %[res] \n" | ||
248 | "ldmia %[s2]!, {r2,r5} \n" | ||
249 | SUBHALFXREGS(r0, r4, r2, r6) | ||
250 | SUBHALFXREGS(r1, r2, r5, r6) | ||
251 | "stmia %[v1]!, {r0,r1} \n" | ||
252 | "ldmia %[v1], {r0,r1} \n" | ||
253 | "smlabt %[res], r0, r3, %[res] \n" | ||
254 | "ldmia %[f2]!, {r2,r3} \n" | ||
255 | "smlatb %[res], r0, r2, %[res] \n" | ||
256 | "smlabt %[res], r1, r2, %[res] \n" | ||
257 | "smlatb %[res], r1, r3, %[res] \n" | ||
258 | "ldmia %[s2]!, {r2,r4} \n" | ||
259 | SUBHALFXREGS(r0, r5, r2, r6) | ||
260 | SUBHALFXREGS(r1, r2, r4, r6) | ||
261 | "stmia %[v1]!, {r0,r1} \n" | ||
262 | #if ORDER > 16 | ||
263 | "subs %[cnt], %[cnt], #1 \n" | ||
264 | "bne 1b \n" | ||
265 | #endif | ||
266 | "b 99f \n" | ||
267 | |||
268 | "20: \n" | ||
269 | "1: \n" | ||
270 | "ldmia %[v1], {r1,r2} \n" | ||
271 | "ldmia %[f2]!, {r3,r4} \n" | ||
272 | #if ORDER > 16 | ||
273 | "smlabb %[res], r1, r3, %[res] \n" | ||
274 | #else | ||
275 | "smulbb %[res], r1, r3 \n" | ||
276 | #endif | ||
277 | "smlatt %[res], r1, r3, %[res] \n" | ||
278 | "smlabb %[res], r2, r4, %[res] \n" | ||
279 | "smlatt %[res], r2, r4, %[res] \n" | ||
280 | "ldmia %[s2]!, {r3,r4} \n" | ||
281 | SUBHALFREGS(r0, r1, r3) | ||
282 | SUBHALFREGS(r1, r2, r4) | ||
283 | "stmia %[v1]!, {r0,r1} \n" | ||
284 | |||
285 | REPEAT_3( | ||
286 | "ldmia %[v1], {r1,r2} \n" | ||
287 | "ldmia %[f2]!, {r3,r4} \n" | ||
288 | "smlabb %[res], r1, r3, %[res] \n" | ||
289 | "smlatt %[res], r1, r3, %[res] \n" | ||
290 | "smlabb %[res], r2, r4, %[res] \n" | ||
291 | "smlatt %[res], r2, r4, %[res] \n" | ||
292 | "ldmia %[s2]!, {r3,r4} \n" | ||
293 | SUBHALFREGS(r0, r1, r3) | ||
294 | SUBHALFREGS(r1, r2, r4) | ||
295 | "stmia %[v1]!, {r0,r1} \n" | ||
296 | ) | ||
297 | #if ORDER > 16 | ||
298 | "subs %[cnt], %[cnt], #1 \n" | ||
299 | "bne 1b \n" | ||
300 | #endif | ||
301 | |||
302 | "99: \n" | ||
303 | : /* outputs */ | ||
304 | #if ORDER > 16 | ||
305 | [cnt]"+r"(cnt), | ||
306 | #endif | ||
307 | [v1] "+r"(v1), | ||
308 | [f2] "+r"(f2), | ||
309 | [s2] "+r"(s2), | ||
310 | [res]"=r"(res) | ||
311 | : /* inputs */ | ||
312 | : /* clobbers */ | ||
313 | "r0", "r1", "r2", "r3", "r4", "r5", "r6", "cc", "memory" | ||
314 | ); | ||
315 | return res; | ||
316 | } | ||
317 | |||
318 | /* This version fetches data as 32 bit words, and *requires* v1 to be | ||
319 | * 32 bit aligned, otherwise it will result either in a data abort, or | ||
320 | * incorrect results (if ARM aligncheck is disabled). */ | ||
321 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | ||
322 | { | ||
323 | int res; | ||
324 | #if ORDER > 32 | ||
325 | int cnt = ORDER>>5; | ||
326 | #endif | ||
327 | |||
328 | asm volatile ( | ||
329 | #if ORDER > 32 | ||
330 | "mov %[res], #0 \n" | ||
331 | #endif | ||
332 | "tst %[v2], #2 \n" | ||
333 | "beq 20f \n" | ||
334 | |||
335 | "10: \n" | ||
336 | "ldrh r3, [%[v2]], #2 \n" | ||
337 | #if ORDER > 32 | ||
338 | "mov r3, r3, lsl #16 \n" | ||
339 | "1: \n" | ||
340 | "ldmia %[v1]!, {r0,r1} \n" | ||
341 | "smlabt %[res], r0, r3, %[res] \n" | ||
342 | #else | ||
343 | "ldmia %[v1]!, {r0,r1} \n" | ||
344 | "smulbb %[res], r0, r3 \n" | ||
345 | #endif | ||
346 | "ldmia %[v2]!, {r2,r3} \n" | ||
347 | "smlatb %[res], r0, r2, %[res] \n" | ||
348 | "smlabt %[res], r1, r2, %[res] \n" | ||
349 | "smlatb %[res], r1, r3, %[res] \n" | ||
350 | |||
351 | REPEAT_MLA( | ||
352 | "ldmia %[v1]!, {r0,r1} \n" | ||
353 | "smlabt %[res], r0, r3, %[res] \n" | ||
354 | "ldmia %[v2]!, {r2,r3} \n" | ||
355 | "smlatb %[res], r0, r2, %[res] \n" | ||
356 | "smlabt %[res], r1, r2, %[res] \n" | ||
357 | "smlatb %[res], r1, r3, %[res] \n" | ||
358 | ) | ||
359 | #if ORDER > 32 | ||
360 | "subs %[cnt], %[cnt], #1 \n" | ||
361 | "bne 1b \n" | ||
362 | #endif | ||
363 | "b 99f \n" | ||
364 | |||
365 | "20: \n" | ||
366 | "1: \n" | ||
367 | "ldmia %[v1]!, {r0,r1} \n" | ||
368 | "ldmia %[v2]!, {r2,r3} \n" | ||
369 | #if ORDER > 32 | ||
370 | "smlabb %[res], r0, r2, %[res] \n" | ||
371 | #else | ||
372 | "smulbb %[res], r0, r2 \n" | ||
373 | #endif | ||
374 | "smlatt %[res], r0, r2, %[res] \n" | ||
375 | "smlabb %[res], r1, r3, %[res] \n" | ||
376 | "smlatt %[res], r1, r3, %[res] \n" | ||
377 | |||
378 | REPEAT_MLA( | ||
379 | "ldmia %[v1]!, {r0,r1} \n" | ||
380 | "ldmia %[v2]!, {r2,r3} \n" | ||
381 | "smlabb %[res], r0, r2, %[res] \n" | ||
382 | "smlatt %[res], r0, r2, %[res] \n" | ||
383 | "smlabb %[res], r1, r3, %[res] \n" | ||
384 | "smlatt %[res], r1, r3, %[res] \n" | ||
385 | ) | ||
386 | #if ORDER > 32 | ||
387 | "subs %[cnt], %[cnt], #1 \n" | ||
388 | "bne 1b \n" | ||
389 | #endif | ||
390 | |||
391 | "99: \n" | ||
392 | : /* outputs */ | ||
393 | #if ORDER > 32 | ||
394 | [cnt]"+r"(cnt), | ||
395 | #endif | ||
396 | [v1] "+r"(v1), | ||
397 | [v2] "+r"(v2), | ||
398 | [res]"=r"(res) | ||
399 | : /* inputs */ | ||
400 | : /* clobbers */ | ||
401 | "r0", "r1", "r2", "r3", "cc", "memory" | ||
402 | ); | ||
403 | return res; | ||
404 | } | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h new file mode 100644 index 0000000000..8d27331b62 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h | |||
@@ -0,0 +1,490 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | ARMv6 vector math copyright (C) 2008 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | #define FUSED_VECTOR_MATH | ||
28 | |||
29 | #if ORDER > 16 | ||
30 | #define REPEAT_BLOCK(x) x x x | ||
31 | #else | ||
32 | #define REPEAT_BLOCK(x) x | ||
33 | #endif | ||
34 | |||
35 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) | ||
36 | * This version fetches data as 32 bit words, and *requires* v1 to be | ||
37 | * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit | ||
38 | * aligned or both unaligned. If either condition isn't met, it will either | ||
39 | * result in a data abort or incorrect results. */ | ||
40 | static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | ||
41 | { | ||
42 | int res; | ||
43 | #if ORDER > 32 | ||
44 | int cnt = ORDER>>5; | ||
45 | #endif | ||
46 | |||
47 | asm volatile ( | ||
48 | #if ORDER > 32 | ||
49 | "mov %[res], #0 \n" | ||
50 | #endif | ||
51 | "tst %[f2], #2 \n" | ||
52 | "beq 20f \n" | ||
53 | |||
54 | "10: \n" | ||
55 | "ldrh r3, [%[f2]], #2 \n" | ||
56 | "ldrh r6, [%[s2]], #2 \n" | ||
57 | "ldmia %[f2]!, {r2,r4} \n" | ||
58 | "mov r3, r3, lsl #16 \n" | ||
59 | "mov r6, r6, lsl #16 \n" | ||
60 | |||
61 | "1: \n" | ||
62 | "ldmia %[s2]!, {r5,r7} \n" | ||
63 | "pkhtb r3, r3, r2 \n" | ||
64 | "pkhtb r2, r2, r4 \n" | ||
65 | "ldrd r0, [%[v1]] \n" | ||
66 | "mov r5, r5, ror #16 \n" | ||
67 | "pkhtb r6, r5, r6, asr #16 \n" | ||
68 | "pkhbt r5, r5, r7, lsl #16 \n" | ||
69 | #if ORDER > 32 | ||
70 | "smladx %[res], r0, r3, %[res] \n" | ||
71 | #else | ||
72 | "smuadx %[res], r0, r3 \n" | ||
73 | #endif | ||
74 | "smladx %[res], r1, r2, %[res] \n" | ||
75 | "ldmia %[f2]!, {r2,r3} \n" | ||
76 | "sadd16 r0, r0, r6 \n" | ||
77 | "sadd16 r1, r1, r5 \n" | ||
78 | "strd r0, [%[v1]], #8 \n" | ||
79 | |||
80 | REPEAT_BLOCK( | ||
81 | "ldmia %[s2]!, {r5,r6} \n" | ||
82 | "pkhtb r4, r4, r2 \n" | ||
83 | "pkhtb r2, r2, r3 \n" | ||
84 | "ldrd r0, [%[v1]] \n" | ||
85 | "mov r5, r5, ror #16 \n" | ||
86 | "pkhtb r7, r5, r7, asr #16 \n" | ||
87 | "pkhbt r5, r5, r6, lsl #16 \n" | ||
88 | "smladx %[res], r0, r4, %[res] \n" | ||
89 | "smladx %[res], r1, r2, %[res] \n" | ||
90 | "ldmia %[f2]!, {r2,r4} \n" | ||
91 | "sadd16 r0, r0, r7 \n" | ||
92 | "sadd16 r1, r1, r5 \n" | ||
93 | "strd r0, [%[v1]], #8 \n" | ||
94 | "ldmia %[s2]!, {r5,r7} \n" | ||
95 | "pkhtb r3, r3, r2 \n" | ||
96 | "pkhtb r2, r2, r4 \n" | ||
97 | "ldrd r0, [%[v1]] \n" | ||
98 | "mov r5, r5, ror #16 \n" | ||
99 | "pkhtb r6, r5, r6, asr #16 \n" | ||
100 | "pkhbt r5, r5, r7, lsl #16 \n" | ||
101 | "smladx %[res], r0, r3, %[res] \n" | ||
102 | "smladx %[res], r1, r2, %[res] \n" | ||
103 | "ldmia %[f2]!, {r2,r3} \n" | ||
104 | "sadd16 r0, r0, r6 \n" | ||
105 | "sadd16 r1, r1, r5 \n" | ||
106 | "strd r0, [%[v1]], #8 \n" | ||
107 | ) | ||
108 | |||
109 | "ldmia %[s2]!, {r5,r6} \n" | ||
110 | "pkhtb r4, r4, r2 \n" | ||
111 | "pkhtb r2, r2, r3 \n" | ||
112 | "ldrd r0, [%[v1]] \n" | ||
113 | "mov r5, r5, ror #16 \n" | ||
114 | "pkhtb r7, r5, r7, asr #16 \n" | ||
115 | "pkhbt r5, r5, r6, lsl #16 \n" | ||
116 | "smladx %[res], r0, r4, %[res] \n" | ||
117 | "smladx %[res], r1, r2, %[res] \n" | ||
118 | #if ORDER > 32 | ||
119 | "subs %[cnt], %[cnt], #1 \n" | ||
120 | "ldmneia %[f2]!, {r2,r4} \n" | ||
121 | "sadd16 r0, r0, r7 \n" | ||
122 | "sadd16 r1, r1, r5 \n" | ||
123 | "strd r0, [%[v1]], #8 \n" | ||
124 | "bne 1b \n" | ||
125 | #else | ||
126 | "sadd16 r0, r0, r7 \n" | ||
127 | "sadd16 r1, r1, r5 \n" | ||
128 | "strd r0, [%[v1]], #8 \n" | ||
129 | #endif | ||
130 | |||
131 | "b 99f \n" | ||
132 | |||
133 | "20: \n" | ||
134 | "ldrd r4, [%[f2]], #8 \n" | ||
135 | "ldrd r0, [%[v1]] \n" | ||
136 | |||
137 | #if ORDER > 32 | ||
138 | "1: \n" | ||
139 | "smlad %[res], r0, r4, %[res] \n" | ||
140 | #else | ||
141 | "smuad %[res], r0, r4 \n" | ||
142 | #endif | ||
143 | "ldrd r6, [%[s2]], #8 \n" | ||
144 | "smlad %[res], r1, r5, %[res] \n" | ||
145 | "ldrd r4, [%[f2]], #8 \n" | ||
146 | "ldrd r2, [%[v1], #8] \n" | ||
147 | "sadd16 r0, r0, r6 \n" | ||
148 | "sadd16 r1, r1, r7 \n" | ||
149 | "strd r0, [%[v1]], #8 \n" | ||
150 | |||
151 | REPEAT_BLOCK( | ||
152 | "smlad %[res], r2, r4, %[res] \n" | ||
153 | "ldrd r6, [%[s2]], #8 \n" | ||
154 | "smlad %[res], r3, r5, %[res] \n" | ||
155 | "ldrd r4, [%[f2]], #8 \n" | ||
156 | "ldrd r0, [%[v1], #8] \n" | ||
157 | "sadd16 r2, r2, r6 \n" | ||
158 | "sadd16 r3, r3, r7 \n" | ||
159 | "strd r2, [%[v1]], #8 \n" | ||
160 | "smlad %[res], r0, r4, %[res] \n" | ||
161 | "ldrd r6, [%[s2]], #8 \n" | ||
162 | "smlad %[res], r1, r5, %[res] \n" | ||
163 | "ldrd r4, [%[f2]], #8 \n" | ||
164 | "ldrd r2, [%[v1], #8] \n" | ||
165 | "sadd16 r0, r0, r6 \n" | ||
166 | "sadd16 r1, r1, r7 \n" | ||
167 | "strd r0, [%[v1]], #8 \n" | ||
168 | ) | ||
169 | |||
170 | "smlad %[res], r2, r4, %[res] \n" | ||
171 | "ldrd r6, [%[s2]], #8 \n" | ||
172 | "smlad %[res], r3, r5, %[res] \n" | ||
173 | #if ORDER > 32 | ||
174 | "subs %[cnt], %[cnt], #1 \n" | ||
175 | "ldrned r4, [%[f2]], #8 \n" | ||
176 | "ldrned r0, [%[v1], #8] \n" | ||
177 | "sadd16 r2, r2, r6 \n" | ||
178 | "sadd16 r3, r3, r7 \n" | ||
179 | "strd r2, [%[v1]], #8 \n" | ||
180 | "bne 1b \n" | ||
181 | #else | ||
182 | "sadd16 r2, r2, r6 \n" | ||
183 | "sadd16 r3, r3, r7 \n" | ||
184 | "strd r2, [%[v1]], #8 \n" | ||
185 | #endif | ||
186 | |||
187 | "99: \n" | ||
188 | : /* outputs */ | ||
189 | #if ORDER > 32 | ||
190 | [cnt]"+r"(cnt), | ||
191 | #endif | ||
192 | [v1] "+r"(v1), | ||
193 | [f2] "+r"(f2), | ||
194 | [s2] "+r"(s2), | ||
195 | [res]"=r"(res) | ||
196 | : /* inputs */ | ||
197 | : /* clobbers */ | ||
198 | "r0", "r1", "r2", "r3", "r4", | ||
199 | "r5", "r6", "r7", "cc", "memory" | ||
200 | ); | ||
201 | return res; | ||
202 | } | ||
203 | |||
204 | /* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) | ||
205 | * This version fetches data as 32 bit words, and *requires* v1 to be | ||
206 | * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit | ||
207 | * aligned or both unaligned. If either condition isn't met, it will either | ||
208 | * result in a data abort or incorrect results. */ | ||
209 | static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | ||
210 | { | ||
211 | int res; | ||
212 | #if ORDER > 32 | ||
213 | int cnt = ORDER>>5; | ||
214 | #endif | ||
215 | |||
216 | asm volatile ( | ||
217 | #if ORDER > 32 | ||
218 | "mov %[res], #0 \n" | ||
219 | #endif | ||
220 | "tst %[f2], #2 \n" | ||
221 | "beq 20f \n" | ||
222 | |||
223 | "10: \n" | ||
224 | "ldrh r3, [%[f2]], #2 \n" | ||
225 | "ldrh r6, [%[s2]], #2 \n" | ||
226 | "ldmia %[f2]!, {r2,r4} \n" | ||
227 | "mov r3, r3, lsl #16 \n" | ||
228 | "mov r6, r6, lsl #16 \n" | ||
229 | |||
230 | "1: \n" | ||
231 | "ldmia %[s2]!, {r5,r7} \n" | ||
232 | "pkhtb r3, r3, r2 \n" | ||
233 | "pkhtb r2, r2, r4 \n" | ||
234 | "ldrd r0, [%[v1]] \n" | ||
235 | "mov r5, r5, ror #16 \n" | ||
236 | "pkhtb r6, r5, r6, asr #16 \n" | ||
237 | "pkhbt r5, r5, r7, lsl #16 \n" | ||
238 | #if ORDER > 32 | ||
239 | "smladx %[res], r0, r3, %[res] \n" | ||
240 | #else | ||
241 | "smuadx %[res], r0, r3 \n" | ||
242 | #endif | ||
243 | "smladx %[res], r1, r2, %[res] \n" | ||
244 | "ldmia %[f2]!, {r2,r3} \n" | ||
245 | "ssub16 r0, r0, r6 \n" | ||
246 | "ssub16 r1, r1, r5 \n" | ||
247 | "strd r0, [%[v1]], #8 \n" | ||
248 | |||
249 | REPEAT_BLOCK( | ||
250 | "ldmia %[s2]!, {r5,r6} \n" | ||
251 | "pkhtb r4, r4, r2 \n" | ||
252 | "pkhtb r2, r2, r3 \n" | ||
253 | "ldrd r0, [%[v1]] \n" | ||
254 | "mov r5, r5, ror #16 \n" | ||
255 | "pkhtb r7, r5, r7, asr #16 \n" | ||
256 | "pkhbt r5, r5, r6, lsl #16 \n" | ||
257 | "smladx %[res], r0, r4, %[res] \n" | ||
258 | "smladx %[res], r1, r2, %[res] \n" | ||
259 | "ldmia %[f2]!, {r2,r4} \n" | ||
260 | "ssub16 r0, r0, r7 \n" | ||
261 | "ssub16 r1, r1, r5 \n" | ||
262 | "strd r0, [%[v1]], #8 \n" | ||
263 | "ldmia %[s2]!, {r5,r7} \n" | ||
264 | "pkhtb r3, r3, r2 \n" | ||
265 | "pkhtb r2, r2, r4 \n" | ||
266 | "ldrd r0, [%[v1]] \n" | ||
267 | "mov r5, r5, ror #16 \n" | ||
268 | "pkhtb r6, r5, r6, asr #16 \n" | ||
269 | "pkhbt r5, r5, r7, lsl #16 \n" | ||
270 | "smladx %[res], r0, r3, %[res] \n" | ||
271 | "smladx %[res], r1, r2, %[res] \n" | ||
272 | "ldmia %[f2]!, {r2,r3} \n" | ||
273 | "ssub16 r0, r0, r6 \n" | ||
274 | "ssub16 r1, r1, r5 \n" | ||
275 | "strd r0, [%[v1]], #8 \n" | ||
276 | ) | ||
277 | |||
278 | "ldmia %[s2]!, {r5,r6} \n" | ||
279 | "pkhtb r4, r4, r2 \n" | ||
280 | "pkhtb r2, r2, r3 \n" | ||
281 | "ldrd r0, [%[v1]] \n" | ||
282 | "mov r5, r5, ror #16 \n" | ||
283 | "pkhtb r7, r5, r7, asr #16 \n" | ||
284 | "pkhbt r5, r5, r6, lsl #16 \n" | ||
285 | "smladx %[res], r0, r4, %[res] \n" | ||
286 | "smladx %[res], r1, r2, %[res] \n" | ||
287 | #if ORDER > 32 | ||
288 | "subs %[cnt], %[cnt], #1 \n" | ||
289 | "ldmneia %[f2]!, {r2,r4} \n" | ||
290 | "ssub16 r0, r0, r7 \n" | ||
291 | "ssub16 r1, r1, r5 \n" | ||
292 | "strd r0, [%[v1]], #8 \n" | ||
293 | "bne 1b \n" | ||
294 | #else | ||
295 | "ssub16 r0, r0, r7 \n" | ||
296 | "ssub16 r1, r1, r5 \n" | ||
297 | "strd r0, [%[v1]], #8 \n" | ||
298 | #endif | ||
299 | |||
300 | "b 99f \n" | ||
301 | |||
302 | "20: \n" | ||
303 | "ldrd r4, [%[f2]], #8 \n" | ||
304 | "ldrd r0, [%[v1]] \n" | ||
305 | |||
306 | #if ORDER > 32 | ||
307 | "1: \n" | ||
308 | "smlad %[res], r0, r4, %[res] \n" | ||
309 | #else | ||
310 | "smuad %[res], r0, r4 \n" | ||
311 | #endif | ||
312 | "ldrd r6, [%[s2]], #8 \n" | ||
313 | "smlad %[res], r1, r5, %[res] \n" | ||
314 | "ldrd r4, [%[f2]], #8 \n" | ||
315 | "ldrd r2, [%[v1], #8] \n" | ||
316 | "ssub16 r0, r0, r6 \n" | ||
317 | "ssub16 r1, r1, r7 \n" | ||
318 | "strd r0, [%[v1]], #8 \n" | ||
319 | |||
320 | REPEAT_BLOCK( | ||
321 | "smlad %[res], r2, r4, %[res] \n" | ||
322 | "ldrd r6, [%[s2]], #8 \n" | ||
323 | "smlad %[res], r3, r5, %[res] \n" | ||
324 | "ldrd r4, [%[f2]], #8 \n" | ||
325 | "ldrd r0, [%[v1], #8] \n" | ||
326 | "ssub16 r2, r2, r6 \n" | ||
327 | "ssub16 r3, r3, r7 \n" | ||
328 | "strd r2, [%[v1]], #8 \n" | ||
329 | "smlad %[res], r0, r4, %[res] \n" | ||
330 | "ldrd r6, [%[s2]], #8 \n" | ||
331 | "smlad %[res], r1, r5, %[res] \n" | ||
332 | "ldrd r4, [%[f2]], #8 \n" | ||
333 | "ldrd r2, [%[v1], #8] \n" | ||
334 | "ssub16 r0, r0, r6 \n" | ||
335 | "ssub16 r1, r1, r7 \n" | ||
336 | "strd r0, [%[v1]], #8 \n" | ||
337 | ) | ||
338 | |||
339 | "smlad %[res], r2, r4, %[res] \n" | ||
340 | "ldrd r6, [%[s2]], #8 \n" | ||
341 | "smlad %[res], r3, r5, %[res] \n" | ||
342 | #if ORDER > 32 | ||
343 | "subs %[cnt], %[cnt], #1 \n" | ||
344 | "ldrned r4, [%[f2]], #8 \n" | ||
345 | "ldrned r0, [%[v1], #8] \n" | ||
346 | "ssub16 r2, r2, r6 \n" | ||
347 | "ssub16 r3, r3, r7 \n" | ||
348 | "strd r2, [%[v1]], #8 \n" | ||
349 | "bne 1b \n" | ||
350 | #else | ||
351 | "ssub16 r2, r2, r6 \n" | ||
352 | "ssub16 r3, r3, r7 \n" | ||
353 | "strd r2, [%[v1]], #8 \n" | ||
354 | #endif | ||
355 | |||
356 | "99: \n" | ||
357 | : /* outputs */ | ||
358 | #if ORDER > 32 | ||
359 | [cnt]"+r"(cnt), | ||
360 | #endif | ||
361 | [v1] "+r"(v1), | ||
362 | [f2] "+r"(f2), | ||
363 | [s2] "+r"(s2), | ||
364 | [res]"=r"(res) | ||
365 | : /* inputs */ | ||
366 | : /* clobbers */ | ||
367 | "r0", "r1", "r2", "r3", "r4", | ||
368 | "r5", "r6", "r7", "cc", "memory" | ||
369 | ); | ||
370 | return res; | ||
371 | } | ||
372 | |||
373 | /* This version fetches data as 32 bit words, and *requires* v1 to be | ||
374 | * 32 bit aligned, otherwise it will result either in a data abort, or | ||
375 | * incorrect results (if ARM aligncheck is disabled). */ | ||
376 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | ||
377 | { | ||
378 | int res; | ||
379 | #if ORDER > 32 | ||
380 | int cnt = ORDER>>5; | ||
381 | #endif | ||
382 | |||
383 | asm volatile ( | ||
384 | #if ORDER > 32 | ||
385 | "mov %[res], #0 \n" | ||
386 | #endif | ||
387 | "tst %[v2], #2 \n" | ||
388 | "beq 20f \n" | ||
389 | |||
390 | "10: \n" | ||
391 | "bic %[v2], %[v2], #2 \n" | ||
392 | "ldmia %[v2]!, {r5-r7} \n" | ||
393 | "ldrd r0, [%[v1]], #8 \n" | ||
394 | |||
395 | "1: \n" | ||
396 | "pkhtb r3, r5, r6 \n" | ||
397 | "ldrd r4, [%[v2]], #8 \n" | ||
398 | #if ORDER > 32 | ||
399 | "smladx %[res], r0, r3, %[res] \n" | ||
400 | #else | ||
401 | "smuadx %[res], r0, r3 \n" | ||
402 | #endif | ||
403 | REPEAT_BLOCK( | ||
404 | "pkhtb r0, r6, r7 \n" | ||
405 | "ldrd r2, [%[v1]], #8 \n" | ||
406 | "smladx %[res], r1, r0, %[res] \n" | ||
407 | "pkhtb r1, r7, r4 \n" | ||
408 | "ldrd r6, [%[v2]], #8 \n" | ||
409 | "smladx %[res], r2, r1, %[res] \n" | ||
410 | "pkhtb r2, r4, r5 \n" | ||
411 | "ldrd r0, [%[v1]], #8 \n" | ||
412 | "smladx %[res], r3, r2, %[res] \n" | ||
413 | "pkhtb r3, r5, r6 \n" | ||
414 | "ldrd r4, [%[v2]], #8 \n" | ||
415 | "smladx %[res], r0, r3, %[res] \n" | ||
416 | ) | ||
417 | |||
418 | "pkhtb r0, r6, r7 \n" | ||
419 | "ldrd r2, [%[v1]], #8 \n" | ||
420 | "smladx %[res], r1, r0, %[res] \n" | ||
421 | "pkhtb r1, r7, r4 \n" | ||
422 | #if ORDER > 32 | ||
423 | "subs %[cnt], %[cnt], #1 \n" | ||
424 | "ldrned r6, [%[v2]], #8 \n" | ||
425 | "smladx %[res], r2, r1, %[res] \n" | ||
426 | "pkhtb r2, r4, r5 \n" | ||
427 | "ldrned r0, [%[v1]], #8 \n" | ||
428 | "smladx %[res], r3, r2, %[res] \n" | ||
429 | "bne 1b \n" | ||
430 | #else | ||
431 | "pkhtb r4, r4, r5 \n" | ||
432 | "smladx %[res], r2, r1, %[res] \n" | ||
433 | "smladx %[res], r3, r4, %[res] \n" | ||
434 | #endif | ||
435 | |||
436 | "b 99f \n" | ||
437 | |||
438 | "20: \n" | ||
439 | "ldrd r0, [%[v1]], #8 \n" | ||
440 | "ldmia %[v2]!, {r5-r7} \n" | ||
441 | |||
442 | "1: \n" | ||
443 | "ldrd r2, [%[v1]], #8 \n" | ||
444 | #if ORDER > 32 | ||
445 | "smlad %[res], r0, r5, %[res] \n" | ||
446 | #else | ||
447 | "smuad %[res], r0, r5 \n" | ||
448 | #endif | ||
449 | REPEAT_BLOCK( | ||
450 | "ldrd r4, [%[v2]], #8 \n" | ||
451 | "smlad %[res], r1, r6, %[res] \n" | ||
452 | "ldrd r0, [%[v1]], #8 \n" | ||
453 | "smlad %[res], r2, r7, %[res] \n" | ||
454 | "ldrd r6, [%[v2]], #8 \n" | ||
455 | "smlad %[res], r3, r4, %[res] \n" | ||
456 | "ldrd r2, [%[v1]], #8 \n" | ||
457 | "smlad %[res], r0, r5, %[res] \n" | ||
458 | ) | ||
459 | |||
460 | #if ORDER > 32 | ||
461 | "ldrd r4, [%[v2]], #8 \n" | ||
462 | "smlad %[res], r1, r6, %[res] \n" | ||
463 | "subs %[cnt], %[cnt], #1 \n" | ||
464 | "ldrned r0, [%[v1]], #8 \n" | ||
465 | "smlad %[res], r2, r7, %[res] \n" | ||
466 | "ldrned r6, [%[v2]], #8 \n" | ||
467 | "smlad %[res], r3, r4, %[res] \n" | ||
468 | "bne 1b \n" | ||
469 | #else | ||
470 | "ldr r4, [%[v2]], #4 \n" | ||
471 | "smlad %[res], r1, r6, %[res] \n" | ||
472 | "smlad %[res], r2, r7, %[res] \n" | ||
473 | "smlad %[res], r3, r4, %[res] \n" | ||
474 | #endif | ||
475 | |||
476 | "99: \n" | ||
477 | : /* outputs */ | ||
478 | #if ORDER > 32 | ||
479 | [cnt]"+r"(cnt), | ||
480 | #endif | ||
481 | [v1] "+r"(v1), | ||
482 | [v2] "+r"(v2), | ||
483 | [res]"=r"(res) | ||
484 | : /* inputs */ | ||
485 | : /* clobbers */ | ||
486 | "r0", "r1", "r2", "r3", | ||
487 | "r4", "r5", "r6", "r7", "cc", "memory" | ||
488 | ); | ||
489 | return res; | ||
490 | } | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv7.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv7.h new file mode 100644 index 0000000000..84afda3e5d --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv7.h | |||
@@ -0,0 +1,214 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | ARMv7 neon vector math copyright (C) 2010 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | #define FUSED_VECTOR_MATH | ||
28 | |||
29 | #if ORDER > 32 | ||
30 | #define REPEAT_BLOCK(x) x x x | ||
31 | #elif ORDER > 16 | ||
32 | #define REPEAT_BLOCK(x) x | ||
33 | #else | ||
34 | #define REPEAT_BLOCK(x) | ||
35 | #endif | ||
36 | |||
37 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) */ | ||
38 | static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | ||
39 | { | ||
40 | int res; | ||
41 | #if ORDER > 64 | ||
42 | int cnt = ORDER>>6; | ||
43 | #endif | ||
44 | |||
45 | asm volatile ( | ||
46 | #if ORDER > 64 | ||
47 | "vmov.i16 q0, #0 \n" | ||
48 | "1: \n" | ||
49 | "subs %[cnt], %[cnt], #1 \n" | ||
50 | #endif | ||
51 | "vld1.16 {d6-d9}, [%[f2]]! \n" | ||
52 | "vld1.16 {d2-d5}, [%[v1]] \n" | ||
53 | "vld1.16 {d10-d13}, [%[s2]]! \n" | ||
54 | #if ORDER > 64 | ||
55 | "vmlal.s16 q0, d2, d6 \n" | ||
56 | #else | ||
57 | "vmull.s16 q0, d2, d6 \n" | ||
58 | #endif | ||
59 | "vmlal.s16 q0, d3, d7 \n" | ||
60 | "vmlal.s16 q0, d4, d8 \n" | ||
61 | "vmlal.s16 q0, d5, d9 \n" | ||
62 | "vadd.i16 q1, q1, q5 \n" | ||
63 | "vadd.i16 q2, q2, q6 \n" | ||
64 | "vst1.16 {d2-d5}, [%[v1]]! \n" | ||
65 | |||
66 | REPEAT_BLOCK( | ||
67 | "vld1.16 {d6-d9}, [%[f2]]! \n" | ||
68 | "vld1.16 {d2-d5}, [%[v1]] \n" | ||
69 | "vld1.16 {d10-d13}, [%[s2]]! \n" | ||
70 | "vmlal.s16 q0, d2, d6 \n" | ||
71 | "vmlal.s16 q0, d3, d7 \n" | ||
72 | "vmlal.s16 q0, d4, d8 \n" | ||
73 | "vmlal.s16 q0, d5, d9 \n" | ||
74 | "vadd.i16 q1, q1, q5 \n" | ||
75 | "vadd.i16 q2, q2, q6 \n" | ||
76 | "vst1.16 {d2-d5}, [%[v1]]! \n" | ||
77 | ) | ||
78 | #if ORDER > 64 | ||
79 | "bne 1b \n" | ||
80 | #endif | ||
81 | "vpadd.i32 d0, d0, d1 \n" | ||
82 | "vpaddl.s32 d0, d0 \n" | ||
83 | "vmov.32 %[res], d0[0] \n" | ||
84 | : /* outputs */ | ||
85 | #if ORDER > 64 | ||
86 | [cnt]"+r"(cnt), | ||
87 | #endif | ||
88 | [v1] "+r"(v1), | ||
89 | [f2] "+r"(f2), | ||
90 | [s2] "+r"(s2), | ||
91 | [res]"=r"(res) | ||
92 | : /* inputs */ | ||
93 | : /* clobbers */ | ||
94 | "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", | ||
95 | "d8", "d9", "d10", "d11", "d12", "d13", "memory" | ||
96 | ); | ||
97 | return res; | ||
98 | } | ||
99 | |||
100 | /* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) */ | ||
101 | static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | ||
102 | { | ||
103 | int res; | ||
104 | #if ORDER > 64 | ||
105 | int cnt = ORDER>>6; | ||
106 | #endif | ||
107 | |||
108 | asm volatile ( | ||
109 | #if ORDER > 64 | ||
110 | "vmov.i16 q0, #0 \n" | ||
111 | "1: \n" | ||
112 | "subs %[cnt], %[cnt], #1 \n" | ||
113 | #endif | ||
114 | "vld1.16 {d6-d9}, [%[f2]]! \n" | ||
115 | "vld1.16 {d2-d5}, [%[v1]] \n" | ||
116 | "vld1.16 {d10-d13}, [%[s2]]! \n" | ||
117 | #if ORDER > 64 | ||
118 | "vmlal.s16 q0, d2, d6 \n" | ||
119 | #else | ||
120 | "vmull.s16 q0, d2, d6 \n" | ||
121 | #endif | ||
122 | "vmlal.s16 q0, d3, d7 \n" | ||
123 | "vmlal.s16 q0, d4, d8 \n" | ||
124 | "vmlal.s16 q0, d5, d9 \n" | ||
125 | "vsub.i16 q1, q1, q5 \n" | ||
126 | "vsub.i16 q2, q2, q6 \n" | ||
127 | "vst1.16 {d2-d5}, [%[v1]]! \n" | ||
128 | |||
129 | REPEAT_BLOCK( | ||
130 | "vld1.16 {d6-d9}, [%[f2]]! \n" | ||
131 | "vld1.16 {d2-d5}, [%[v1]] \n" | ||
132 | "vld1.16 {d10-d13}, [%[s2]]! \n" | ||
133 | "vmlal.s16 q0, d2, d6 \n" | ||
134 | "vmlal.s16 q0, d3, d7 \n" | ||
135 | "vmlal.s16 q0, d4, d8 \n" | ||
136 | "vmlal.s16 q0, d5, d9 \n" | ||
137 | "vsub.i16 q1, q1, q5 \n" | ||
138 | "vsub.i16 q2, q2, q6 \n" | ||
139 | "vst1.16 {d2-d5}, [%[v1]]! \n" | ||
140 | ) | ||
141 | #if ORDER > 64 | ||
142 | "bne 1b \n" | ||
143 | #endif | ||
144 | "vpadd.i32 d0, d0, d1 \n" | ||
145 | "vpaddl.s32 d0, d0 \n" | ||
146 | "vmov.32 %[res], d0[0] \n" | ||
147 | : /* outputs */ | ||
148 | #if ORDER > 64 | ||
149 | [cnt]"+r"(cnt), | ||
150 | #endif | ||
151 | [v1] "+r"(v1), | ||
152 | [f2] "+r"(f2), | ||
153 | [s2] "+r"(s2), | ||
154 | [res]"=r"(res) | ||
155 | : /* inputs */ | ||
156 | : /* clobbers */ | ||
157 | "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", | ||
158 | "d8", "d9", "d10", "d11", "d12", "d13", "memory" | ||
159 | ); | ||
160 | return res; | ||
161 | } | ||
162 | |||
163 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | ||
164 | { | ||
165 | int res; | ||
166 | #if ORDER > 64 | ||
167 | int cnt = ORDER>>6; | ||
168 | #endif | ||
169 | |||
170 | asm volatile ( | ||
171 | #if ORDER > 64 | ||
172 | "vmov.i16 q0, #0 \n" | ||
173 | "1: \n" | ||
174 | "subs %[cnt], %[cnt], #1 \n" | ||
175 | #endif | ||
176 | "vld1.16 {d2-d5}, [%[v1]]! \n" | ||
177 | "vld1.16 {d6-d9}, [%[v2]]! \n" | ||
178 | #if ORDER > 64 | ||
179 | "vmlal.s16 q0, d2, d6 \n" | ||
180 | #else | ||
181 | "vmull.s16 q0, d2, d6 \n" | ||
182 | #endif | ||
183 | "vmlal.s16 q0, d3, d7 \n" | ||
184 | "vmlal.s16 q0, d4, d8 \n" | ||
185 | "vmlal.s16 q0, d5, d9 \n" | ||
186 | |||
187 | REPEAT_BLOCK( | ||
188 | "vld1.16 {d2-d5}, [%[v1]]! \n" | ||
189 | "vld1.16 {d6-d9}, [%[v2]]! \n" | ||
190 | "vmlal.s16 q0, d2, d6 \n" | ||
191 | "vmlal.s16 q0, d3, d7 \n" | ||
192 | "vmlal.s16 q0, d4, d8 \n" | ||
193 | "vmlal.s16 q0, d5, d9 \n" | ||
194 | ) | ||
195 | #if ORDER > 64 | ||
196 | "bne 1b \n" | ||
197 | #endif | ||
198 | "vpadd.i32 d0, d0, d1 \n" | ||
199 | "vpaddl.s32 d0, d0 \n" | ||
200 | "vmov.32 %[res], d0[0] \n" | ||
201 | : /* outputs */ | ||
202 | #if ORDER > 64 | ||
203 | [cnt]"+r"(cnt), | ||
204 | #endif | ||
205 | [v1] "+r"(v1), | ||
206 | [v2] "+r"(v2), | ||
207 | [res]"=r"(res) | ||
208 | : /* inputs */ | ||
209 | : /* clobbers */ | ||
210 | "d0", "d1", "d2", "d3", "d4", | ||
211 | "d5", "d6", "d7", "d8", "d9" | ||
212 | ); | ||
213 | return res; | ||
214 | } | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_cf.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_cf.h new file mode 100644 index 0000000000..4d77d3be31 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_cf.h | |||
@@ -0,0 +1,364 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | Coldfire vector math copyright (C) 2007 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | #define FUSED_VECTOR_MATH | ||
28 | |||
29 | #define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */ | ||
30 | |||
31 | #define REPEAT_2(x) x x | ||
32 | #define REPEAT_3(x) x x x | ||
33 | #define REPEAT_7(x) x x x x x x x | ||
34 | |||
35 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) | ||
36 | * This version fetches data as 32 bit words, and *recommends* v1 to be | ||
37 | * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit | ||
38 | * aligned or both unaligned. Performance will suffer if either condition | ||
39 | * isn't met. It also needs EMAC in signed integer mode. */ | ||
40 | static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | ||
41 | { | ||
42 | int res; | ||
43 | #if ORDER > 16 | ||
44 | int cnt = ORDER>>4; | ||
45 | #endif | ||
46 | |||
47 | #define ADDHALFREGS(s1, s2, sum) /* Add register halves straight. */ \ | ||
48 | "move.l " #s1 ", " #sum "\n" /* 's1' and 's2' can be A or D */ \ | ||
49 | "add.l " #s2 ", " #s1 "\n" /* regs, 'sum' must be a D reg. */ \ | ||
50 | "clr.w " #sum " \n" /* 's1' is clobbered! */ \ | ||
51 | "add.l " #s2 ", " #sum "\n" \ | ||
52 | "move.w " #s1 ", " #sum "\n" | ||
53 | |||
54 | #define ADDHALFXREGS(s1, s2, sum) /* Add register halves across. */ \ | ||
55 | "clr.w " #sum " \n" /* Needs 'sum' pre-swapped, swaps */ \ | ||
56 | "add.l " #s1 ", " #sum "\n" /* 's2', and clobbers 's1'. */ \ | ||
57 | "swap " #s2 " \n" /* 's1' can be an A or D reg. */ \ | ||
58 | "add.l " #s2 ", " #s1 "\n" /* 'sum' and 's2' must be D regs. */ \ | ||
59 | "move.w " #s1 ", " #sum "\n" | ||
60 | |||
61 | asm volatile ( | ||
62 | "move.l %[f2], %%d0 \n" | ||
63 | "and.l #2, %%d0 \n" | ||
64 | "jeq 20f \n" | ||
65 | |||
66 | "10: \n" | ||
67 | "move.w (%[f2])+, %%d0 \n" | ||
68 | "move.w (%[s2])+, %%d1 \n" | ||
69 | "swap %%d1 \n" | ||
70 | "1: \n" | ||
71 | REPEAT_2( | ||
72 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" | ||
73 | "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" | ||
74 | "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" | ||
75 | ADDHALFXREGS(%%d6, %%d2, %%d1) | ||
76 | "mac.w %%d0l, %%d7u, (%[f2])+, %%d0, %%acc0\n" | ||
77 | "mac.w %%d0u, %%d7l, (%[s2])+, %%d6, %%acc0\n" | ||
78 | "move.l %%d1, (%[v1])+ \n" | ||
79 | ADDHALFXREGS(%%d7, %%d6, %%d2) | ||
80 | "mac.w %%d0l, %%a0u, (%[f2])+, %%d0, %%acc0\n" | ||
81 | "mac.w %%d0u, %%a0l, (%[s2])+, %%d7, %%acc0\n" | ||
82 | "move.l %%d2, (%[v1])+ \n" | ||
83 | ADDHALFXREGS(%%a0, %%d7, %%d6) | ||
84 | "mac.w %%d0l, %%a1u, (%[f2])+, %%d0, %%acc0\n" | ||
85 | "mac.w %%d0u, %%a1l, (%[s2])+, %%d1, %%acc0\n" | ||
86 | "move.l %%d6, (%[v1])+ \n" | ||
87 | ADDHALFXREGS(%%a1, %%d1, %%d7) | ||
88 | "move.l %%d7, (%[v1])+ \n" | ||
89 | ) | ||
90 | |||
91 | #if ORDER > 16 | ||
92 | "subq.l #1, %[res] \n" | ||
93 | "bne.w 1b \n" | ||
94 | #endif | ||
95 | "jra 99f \n" | ||
96 | |||
97 | "20: \n" | ||
98 | "move.l (%[f2])+, %%d0 \n" | ||
99 | "1: \n" | ||
100 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" | ||
101 | "mac.w %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n" | ||
102 | "mac.w %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n" | ||
103 | ADDHALFREGS(%%d6, %%d1, %%d2) | ||
104 | "mac.w %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n" | ||
105 | "mac.w %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n" | ||
106 | "move.l %%d2, (%[v1])+ \n" | ||
107 | ADDHALFREGS(%%d7, %%d1, %%d2) | ||
108 | "mac.w %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n" | ||
109 | "mac.w %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n" | ||
110 | "move.l %%d2, (%[v1])+ \n" | ||
111 | ADDHALFREGS(%%a0, %%d1, %%d2) | ||
112 | "mac.w %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n" | ||
113 | "mac.w %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n" | ||
114 | "move.l %%d2, (%[v1])+ \n" | ||
115 | ADDHALFREGS(%%a1, %%d1, %%d2) | ||
116 | "move.l %%d2, (%[v1])+ \n" | ||
117 | |||
118 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" | ||
119 | "mac.w %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n" | ||
120 | "mac.w %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n" | ||
121 | ADDHALFREGS(%%d6, %%d1, %%d2) | ||
122 | "mac.w %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n" | ||
123 | "mac.w %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n" | ||
124 | "move.l %%d2, (%[v1])+ \n" | ||
125 | ADDHALFREGS(%%d7, %%d1, %%d2) | ||
126 | "mac.w %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n" | ||
127 | "mac.w %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n" | ||
128 | "move.l %%d2, (%[v1])+ \n" | ||
129 | ADDHALFREGS(%%a0, %%d1, %%d2) | ||
130 | "mac.w %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n" | ||
131 | #if ORDER > 16 | ||
132 | "mac.w %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n" | ||
133 | #else | ||
134 | "mac.w %%d0l, %%a1l, %%acc0 \n" | ||
135 | #endif | ||
136 | "move.l %%d2, (%[v1])+ \n" | ||
137 | ADDHALFREGS(%%a1, %%d1, %%d2) | ||
138 | "move.l %%d2, (%[v1])+ \n" | ||
139 | #if ORDER > 16 | ||
140 | "subq.l #1, %[res] \n" | ||
141 | "bne.w 1b \n" | ||
142 | #endif | ||
143 | |||
144 | "99: \n" | ||
145 | "movclr.l %%acc0, %[res] \n" | ||
146 | : /* outputs */ | ||
147 | [v1]"+a"(v1), | ||
148 | [f2]"+a"(f2), | ||
149 | [s2]"+a"(s2), | ||
150 | [res]"=d"(res) | ||
151 | : /* inputs */ | ||
152 | #if ORDER > 16 | ||
153 | [cnt]"[res]"(cnt) | ||
154 | #endif | ||
155 | : /* clobbers */ | ||
156 | "d0", "d1", "d2", "d6", "d7", | ||
157 | "a0", "a1", "memory" | ||
158 | |||
159 | ); | ||
160 | return res; | ||
161 | } | ||
162 | |||
163 | /* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) | ||
164 | * This version fetches data as 32 bit words, and *recommends* v1 to be | ||
165 | * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit | ||
166 | * aligned or both unaligned. Performance will suffer if either condition | ||
167 | * isn't met. It also needs EMAC in signed integer mode. */ | ||
168 | static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | ||
169 | { | ||
170 | int res; | ||
171 | #if ORDER > 16 | ||
172 | int cnt = ORDER>>4; | ||
173 | #endif | ||
174 | |||
175 | #define SUBHALFREGS(min, sub, dif) /* Subtract register halves straight. */ \ | ||
176 | "move.l " #min ", " #dif "\n" /* 'min' can be an A or D reg */ \ | ||
177 | "sub.l " #sub ", " #min "\n" /* 'sub' and 'dif' must be D regs */ \ | ||
178 | "clr.w " #sub "\n" /* 'min' and 'sub' are clobbered! */ \ | ||
179 | "sub.l " #sub ", " #dif "\n" \ | ||
180 | "move.w " #min ", " #dif "\n" | ||
181 | |||
182 | #define SUBHALFXREGS(min, s2, s1d) /* Subtract register halves across. */ \ | ||
183 | "clr.w " #s1d "\n" /* Needs 's1d' pre-swapped, swaps */ \ | ||
184 | "sub.l " #s1d ", " #min "\n" /* 's2' and clobbers 'min'. */ \ | ||
185 | "move.l " #min ", " #s1d "\n" /* 'min' can be an A or D reg, */ \ | ||
186 | "swap " #s2 "\n" /* 's2' and 's1d' must be D regs. */ \ | ||
187 | "sub.l " #s2 ", " #min "\n" \ | ||
188 | "move.w " #min ", " #s1d "\n" | ||
189 | |||
190 | asm volatile ( | ||
191 | "move.l %[f2], %%d0 \n" | ||
192 | "and.l #2, %%d0 \n" | ||
193 | "jeq 20f \n" | ||
194 | |||
195 | "10: \n" | ||
196 | "move.w (%[f2])+, %%d0 \n" | ||
197 | "move.w (%[s2])+, %%d1 \n" | ||
198 | "swap %%d1 \n" | ||
199 | "1: \n" | ||
200 | REPEAT_2( | ||
201 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" | ||
202 | "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" | ||
203 | "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" | ||
204 | SUBHALFXREGS(%%d6, %%d2, %%d1) | ||
205 | "mac.w %%d0l, %%d7u, (%[f2])+, %%d0, %%acc0\n" | ||
206 | "mac.w %%d0u, %%d7l, (%[s2])+, %%d6, %%acc0\n" | ||
207 | "move.l %%d1, (%[v1])+ \n" | ||
208 | SUBHALFXREGS(%%d7, %%d6, %%d2) | ||
209 | "mac.w %%d0l, %%a0u, (%[f2])+, %%d0, %%acc0\n" | ||
210 | "mac.w %%d0u, %%a0l, (%[s2])+, %%d7, %%acc0\n" | ||
211 | "move.l %%d2, (%[v1])+ \n" | ||
212 | SUBHALFXREGS(%%a0, %%d7, %%d6) | ||
213 | "mac.w %%d0l, %%a1u, (%[f2])+, %%d0, %%acc0\n" | ||
214 | "mac.w %%d0u, %%a1l, (%[s2])+, %%d1, %%acc0\n" | ||
215 | "move.l %%d6, (%[v1])+ \n" | ||
216 | SUBHALFXREGS(%%a1, %%d1, %%d7) | ||
217 | "move.l %%d7, (%[v1])+ \n" | ||
218 | ) | ||
219 | |||
220 | #if ORDER > 16 | ||
221 | "subq.l #1, %[res] \n" | ||
222 | "bne.w 1b \n" | ||
223 | #endif | ||
224 | |||
225 | "jra 99f \n" | ||
226 | |||
227 | "20: \n" | ||
228 | "move.l (%[f2])+, %%d0 \n" | ||
229 | "1: \n" | ||
230 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" | ||
231 | "mac.w %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n" | ||
232 | "mac.w %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n" | ||
233 | SUBHALFREGS(%%d6, %%d1, %%d2) | ||
234 | "mac.w %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n" | ||
235 | "mac.w %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n" | ||
236 | "move.l %%d2, (%[v1])+ \n" | ||
237 | SUBHALFREGS(%%d7, %%d1, %%d2) | ||
238 | "mac.w %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n" | ||
239 | "mac.w %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n" | ||
240 | "move.l %%d2, (%[v1])+ \n" | ||
241 | SUBHALFREGS(%%a0, %%d1, %%d2) | ||
242 | "mac.w %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n" | ||
243 | "mac.w %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n" | ||
244 | "move.l %%d2, (%[v1])+ \n" | ||
245 | SUBHALFREGS(%%a1, %%d1, %%d2) | ||
246 | "move.l %%d2, (%[v1])+ \n" | ||
247 | |||
248 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" | ||
249 | "mac.w %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n" | ||
250 | "mac.w %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n" | ||
251 | SUBHALFREGS(%%d6, %%d1, %%d2) | ||
252 | "mac.w %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n" | ||
253 | "mac.w %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n" | ||
254 | "move.l %%d2, (%[v1])+ \n" | ||
255 | SUBHALFREGS(%%d7, %%d1, %%d2) | ||
256 | "mac.w %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n" | ||
257 | "mac.w %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n" | ||
258 | "move.l %%d2, (%[v1])+ \n" | ||
259 | SUBHALFREGS(%%a0, %%d1, %%d2) | ||
260 | "mac.w %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n" | ||
261 | #if ORDER > 16 | ||
262 | "mac.w %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n" | ||
263 | #else | ||
264 | "mac.w %%d0l, %%a1l, %%acc0 \n" | ||
265 | #endif | ||
266 | "move.l %%d2, (%[v1])+ \n" | ||
267 | SUBHALFREGS(%%a1, %%d1, %%d2) | ||
268 | "move.l %%d2, (%[v1])+ \n" | ||
269 | #if ORDER > 16 | ||
270 | "subq.l #1, %[res] \n" | ||
271 | "bne.w 1b \n" | ||
272 | #endif | ||
273 | |||
274 | "99: \n" | ||
275 | "movclr.l %%acc0, %[res] \n" | ||
276 | : /* outputs */ | ||
277 | [v1]"+a"(v1), | ||
278 | [f2]"+a"(f2), | ||
279 | [s2]"+a"(s2), | ||
280 | [res]"=d"(res) | ||
281 | : /* inputs */ | ||
282 | #if ORDER > 16 | ||
283 | [cnt]"[res]"(cnt) | ||
284 | #endif | ||
285 | : /* clobbers */ | ||
286 | "d0", "d1", "d2", "d6", "d7", | ||
287 | "a0", "a1", "memory" | ||
288 | |||
289 | ); | ||
290 | return res; | ||
291 | } | ||
292 | |||
293 | /* This version fetches data as 32 bit words, and *recommends* v1 to be | ||
294 | * 32 bit aligned, otherwise performance will suffer. It also needs EMAC | ||
295 | * in signed integer mode. */ | ||
296 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | ||
297 | { | ||
298 | int res; | ||
299 | #if ORDER > 16 | ||
300 | int cnt = ORDER>>4; | ||
301 | #endif | ||
302 | |||
303 | asm volatile ( | ||
304 | "move.l %[v2], %%d0 \n" | ||
305 | "and.l #2, %%d0 \n" | ||
306 | "jeq 20f \n" | ||
307 | |||
308 | "10: \n" | ||
309 | "move.l (%[v1])+, %%d0 \n" | ||
310 | "move.w (%[v2])+, %%d1 \n" | ||
311 | "1: \n" | ||
312 | REPEAT_7( | ||
313 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" | ||
314 | "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" | ||
315 | ) | ||
316 | |||
317 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" | ||
318 | #if ORDER > 16 | ||
319 | "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" | ||
320 | "subq.l #1, %[res] \n" | ||
321 | "bne.b 1b \n" | ||
322 | #else | ||
323 | "mac.w %%d0l, %%d1u, %%acc0 \n" | ||
324 | #endif | ||
325 | "jra 99f \n" | ||
326 | |||
327 | "20: \n" | ||
328 | "move.l (%[v1])+, %%d0 \n" | ||
329 | "move.l (%[v2])+, %%d1 \n" | ||
330 | "1: \n" | ||
331 | REPEAT_3( | ||
332 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" | ||
333 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" | ||
334 | "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" | ||
335 | "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" | ||
336 | ) | ||
337 | |||
338 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" | ||
339 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" | ||
340 | #if ORDER > 16 | ||
341 | "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" | ||
342 | "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" | ||
343 | "subq.l #1, %[res] \n" | ||
344 | "bne.b 1b \n" | ||
345 | #else | ||
346 | "mac.w %%d2u, %%d1u, %%acc0 \n" | ||
347 | "mac.w %%d2l, %%d1l, %%acc0 \n" | ||
348 | #endif | ||
349 | |||
350 | "99: \n" | ||
351 | "movclr.l %%acc0, %[res] \n" | ||
352 | : /* outputs */ | ||
353 | [v1]"+a"(v1), | ||
354 | [v2]"+a"(v2), | ||
355 | [res]"=d"(res) | ||
356 | : /* inputs */ | ||
357 | #if ORDER > 16 | ||
358 | [cnt]"[res]"(cnt) | ||
359 | #endif | ||
360 | : /* clobbers */ | ||
361 | "d0", "d1", "d2" | ||
362 | ); | ||
363 | return res; | ||
364 | } | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h new file mode 100644 index 0000000000..2177fe88ea --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h | |||
@@ -0,0 +1,234 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | MMX vector math copyright (C) 2010 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | #define FUSED_VECTOR_MATH | ||
28 | |||
29 | #define REPEAT_MB3(x, n) x(n) x(n+8) x(n+16) | ||
30 | #define REPEAT_MB7(x, n) x(n) x(n+8) x(n+16) x(n+24) x(n+32) x(n+40) x(n+48) | ||
31 | #define REPEAT_MB8(x, n) REPEAT_MB7(x, n) x(n+56) | ||
32 | |||
33 | #if ORDER == 16 /* 3 times */ | ||
34 | #define REPEAT_MB(x) REPEAT_MB3(x, 8) | ||
35 | #elif ORDER == 32 /* 7 times */ | ||
36 | #define REPEAT_MB(x) REPEAT_MB7(x, 8) | ||
37 | #elif ORDER == 64 /* 5*3 == 15 times */ | ||
38 | #define REPEAT_MB(x) REPEAT_MB3(x, 8) REPEAT_MB3(x, 32) REPEAT_MB3(x, 56) \ | ||
39 | REPEAT_MB3(x, 80) REPEAT_MB3(x, 104) | ||
40 | #elif ORDER == 256 /* 9*7 == 63 times */ | ||
41 | #define REPEAT_MB(x) REPEAT_MB7(x, 8) REPEAT_MB7(x, 64) REPEAT_MB7(x, 120) \ | ||
42 | REPEAT_MB7(x, 176) REPEAT_MB7(x, 232) REPEAT_MB7(x, 288) \ | ||
43 | REPEAT_MB7(x, 344) REPEAT_MB7(x, 400) REPEAT_MB7(x, 456) | ||
44 | #elif ORDER == 1280 /* 8*8 == 64 times */ | ||
45 | #define REPEAT_MB(x) REPEAT_MB8(x, 0) REPEAT_MB8(x, 64) REPEAT_MB8(x, 128) \ | ||
46 | REPEAT_MB8(x, 192) REPEAT_MB8(x, 256) REPEAT_MB8(x, 320) \ | ||
47 | REPEAT_MB8(x, 384) REPEAT_MB8(x, 448) | ||
48 | #else | ||
49 | #error unsupported order | ||
50 | #endif | ||
51 | |||
52 | |||
53 | static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2) | ||
54 | { | ||
55 | int res, t; | ||
56 | #if ORDER > 256 | ||
57 | int cnt = ORDER>>8; | ||
58 | #endif | ||
59 | |||
60 | asm volatile ( | ||
61 | #if ORDER > 256 | ||
62 | "pxor %%mm2, %%mm2 \n" | ||
63 | "1: \n" | ||
64 | #else | ||
65 | "movq (%[v1]), %%mm2 \n" | ||
66 | "movq %%mm2, %%mm0 \n" | ||
67 | "pmaddwd (%[f2]), %%mm2 \n" | ||
68 | "paddw (%[s2]), %%mm0 \n" | ||
69 | "movq %%mm0, (%[v1]) \n" | ||
70 | #endif | ||
71 | |||
72 | #define SP_ADD_BLOCK(n) \ | ||
73 | "movq " #n "(%[v1]), %%mm1 \n" \ | ||
74 | "movq %%mm1, %%mm0 \n" \ | ||
75 | "pmaddwd " #n "(%[f2]), %%mm1 \n" \ | ||
76 | "paddw " #n "(%[s2]), %%mm0 \n" \ | ||
77 | "movq %%mm0, " #n "(%[v1]) \n" \ | ||
78 | "paddd %%mm1, %%mm2 \n" | ||
79 | |||
80 | REPEAT_MB(SP_ADD_BLOCK) | ||
81 | |||
82 | #if ORDER > 256 | ||
83 | "add $512, %[v1] \n" | ||
84 | "add $512, %[s2] \n" | ||
85 | "add $512, %[f2] \n" | ||
86 | "dec %[cnt] \n" | ||
87 | "jne 1b \n" | ||
88 | #endif | ||
89 | |||
90 | "movd %%mm2, %[t] \n" | ||
91 | "psrlq $32, %%mm2 \n" | ||
92 | "movd %%mm2, %[res] \n" | ||
93 | "add %[t], %[res] \n" | ||
94 | : /* outputs */ | ||
95 | #if ORDER > 256 | ||
96 | [cnt]"+r"(cnt), | ||
97 | [s2] "+r"(s2), | ||
98 | [res]"=r"(res), | ||
99 | [t] "=r"(t) | ||
100 | : /* inputs */ | ||
101 | [v1]"2"(v1), | ||
102 | [f2]"3"(f2) | ||
103 | #else | ||
104 | [res]"=r"(res), | ||
105 | [t] "=r"(t) | ||
106 | : /* inputs */ | ||
107 | [v1]"r"(v1), | ||
108 | [f2]"r"(f2), | ||
109 | [s2]"r"(s2) | ||
110 | #endif | ||
111 | : /* clobbers */ | ||
112 | "mm0", "mm1", "mm2" | ||
113 | ); | ||
114 | return res; | ||
115 | } | ||
116 | |||
117 | static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2) | ||
118 | { | ||
119 | int res, t; | ||
120 | #if ORDER > 256 | ||
121 | int cnt = ORDER>>8; | ||
122 | #endif | ||
123 | |||
124 | asm volatile ( | ||
125 | #if ORDER > 256 | ||
126 | "pxor %%mm2, %%mm2 \n" | ||
127 | "1: \n" | ||
128 | #else | ||
129 | "movq (%[v1]), %%mm2 \n" | ||
130 | "movq %%mm2, %%mm0 \n" | ||
131 | "pmaddwd (%[f2]), %%mm2 \n" | ||
132 | "psubw (%[s2]), %%mm0 \n" | ||
133 | "movq %%mm0, (%[v1]) \n" | ||
134 | #endif | ||
135 | |||
136 | #define SP_SUB_BLOCK(n) \ | ||
137 | "movq " #n "(%[v1]), %%mm1 \n" \ | ||
138 | "movq %%mm1, %%mm0 \n" \ | ||
139 | "pmaddwd " #n "(%[f2]), %%mm1 \n" \ | ||
140 | "psubw " #n "(%[s2]), %%mm0 \n" \ | ||
141 | "movq %%mm0, " #n "(%[v1]) \n" \ | ||
142 | "paddd %%mm1, %%mm2 \n" | ||
143 | |||
144 | REPEAT_MB(SP_SUB_BLOCK) | ||
145 | |||
146 | #if ORDER > 256 | ||
147 | "add $512, %[v1] \n" | ||
148 | "add $512, %[s2] \n" | ||
149 | "add $512, %[f2] \n" | ||
150 | "dec %[cnt] \n" | ||
151 | "jne 1b \n" | ||
152 | #endif | ||
153 | |||
154 | "movd %%mm2, %[t] \n" | ||
155 | "psrlq $32, %%mm2 \n" | ||
156 | "movd %%mm2, %[res] \n" | ||
157 | "add %[t], %[res] \n" | ||
158 | : /* outputs */ | ||
159 | #if ORDER > 256 | ||
160 | [cnt]"+r"(cnt), | ||
161 | [s2] "+r"(s2), | ||
162 | [res]"=r"(res), | ||
163 | [t] "=r"(t) | ||
164 | : /* inputs */ | ||
165 | [v1]"2"(v1), | ||
166 | [f2]"3"(f2) | ||
167 | #else | ||
168 | [res]"=r"(res), | ||
169 | [t] "=r"(t) | ||
170 | : /* inputs */ | ||
171 | [v1]"r"(v1), | ||
172 | [f2]"r"(f2), | ||
173 | [s2]"r"(s2) | ||
174 | #endif | ||
175 | : /* clobbers */ | ||
176 | "mm0", "mm1", "mm2" | ||
177 | ); | ||
178 | return res; | ||
179 | } | ||
180 | |||
181 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | ||
182 | { | ||
183 | int res, t; | ||
184 | #if ORDER > 256 | ||
185 | int cnt = ORDER>>8; | ||
186 | #endif | ||
187 | |||
188 | asm volatile ( | ||
189 | #if ORDER > 256 | ||
190 | "pxor %%mm1, %%mm1 \n" | ||
191 | "1: \n" | ||
192 | #else | ||
193 | "movq (%[v1]), %%mm1 \n" | ||
194 | "pmaddwd (%[v2]), %%mm1 \n" | ||
195 | #endif | ||
196 | |||
197 | #define SP_BLOCK(n) \ | ||
198 | "movq " #n "(%[v1]), %%mm0 \n" \ | ||
199 | "pmaddwd " #n "(%[v2]), %%mm0 \n" \ | ||
200 | "paddd %%mm0, %%mm1 \n" | ||
201 | |||
202 | REPEAT_MB(SP_BLOCK) | ||
203 | |||
204 | #if ORDER > 256 | ||
205 | "add $512, %[v1] \n" | ||
206 | "add $512, %[v2] \n" | ||
207 | "dec %[cnt] \n" | ||
208 | "jne 1b \n" | ||
209 | #endif | ||
210 | |||
211 | "movd %%mm1, %[t] \n" | ||
212 | "psrlq $32, %%mm1 \n" | ||
213 | "movd %%mm1, %[res] \n" | ||
214 | "add %[t], %[res] \n" | ||
215 | : /* outputs */ | ||
216 | #if ORDER > 256 | ||
217 | [cnt]"+r"(cnt), | ||
218 | [res]"=r"(res), | ||
219 | [t] "=r"(t) | ||
220 | : /* inputs */ | ||
221 | [v1]"1"(v1), | ||
222 | [v2]"2"(v2) | ||
223 | #else | ||
224 | [res]"=r"(res), | ||
225 | [t] "=r"(t) | ||
226 | : /* inputs */ | ||
227 | [v1]"r"(v1), | ||
228 | [v2]"r"(v2) | ||
229 | #endif | ||
230 | : /* clobbers */ | ||
231 | "mm0", "mm1" | ||
232 | ); | ||
233 | return res; | ||
234 | } | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h b/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h new file mode 100644 index 0000000000..d6bb9b0d9c --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h | |||
@@ -0,0 +1,201 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | ARMv4 vector math copyright (C) 2008 Jens Arnold | ||
10 | |||
11 | This program is free software; you can redistribute it and/or modify | ||
12 | it under the terms of the GNU General Public License as published by | ||
13 | the Free Software Foundation; either version 2 of the License, or | ||
14 | (at your option) any later version. | ||
15 | |||
16 | This program is distributed in the hope that it will be useful, | ||
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | GNU General Public License for more details. | ||
20 | |||
21 | You should have received a copy of the GNU General Public License | ||
22 | along with this program; if not, write to the Free Software | ||
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
24 | |||
25 | */ | ||
26 | |||
27 | #define FUSED_VECTOR_MATH | ||
28 | |||
29 | #if ORDER > 32 | ||
30 | #define REPEAT_BLOCK(x) x x x x x x x x | ||
31 | #elif ORDER > 16 | ||
32 | #define REPEAT_BLOCK(x) x x x x x x x | ||
33 | #else | ||
34 | #define REPEAT_BLOCK(x) x x x | ||
35 | #endif | ||
36 | |||
37 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) */ | ||
38 | static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2) | ||
39 | { | ||
40 | int res; | ||
41 | #if ORDER > 32 | ||
42 | int cnt = ORDER>>5; | ||
43 | #endif | ||
44 | |||
45 | asm volatile ( | ||
46 | #if ORDER > 32 | ||
47 | "mov %[res], #0 \n" | ||
48 | "1: \n" | ||
49 | #else | ||
50 | "ldmia %[v1], {r0-r3} \n" | ||
51 | "ldmia %[f2]!, {r4-r7} \n" | ||
52 | "mul %[res], r4, r0 \n" | ||
53 | "mla %[res], r5, r1, %[res] \n" | ||
54 | "mla %[res], r6, r2, %[res] \n" | ||
55 | "mla %[res], r7, r3, %[res] \n" | ||
56 | "ldmia %[s2]!, {r4-r7} \n" | ||
57 | "add r0, r0, r4 \n" | ||
58 | "add r1, r1, r5 \n" | ||
59 | "add r2, r2, r6 \n" | ||
60 | "add r3, r3, r7 \n" | ||
61 | "stmia %[v1]!, {r0-r3} \n" | ||
62 | #endif | ||
63 | REPEAT_BLOCK( | ||
64 | "ldmia %[v1], {r0-r3} \n" | ||
65 | "ldmia %[f2]!, {r4-r7} \n" | ||
66 | "mla %[res], r4, r0, %[res] \n" | ||
67 | "mla %[res], r5, r1, %[res] \n" | ||
68 | "mla %[res], r6, r2, %[res] \n" | ||
69 | "mla %[res], r7, r3, %[res] \n" | ||
70 | "ldmia %[s2]!, {r4-r7} \n" | ||
71 | "add r0, r0, r4 \n" | ||
72 | "add r1, r1, r5 \n" | ||
73 | "add r2, r2, r6 \n" | ||
74 | "add r3, r3, r7 \n" | ||
75 | "stmia %[v1]!, {r0-r3} \n" | ||
76 | ) | ||
77 | #if ORDER > 32 | ||
78 | "subs %[cnt], %[cnt], #1 \n" | ||
79 | "bne 1b \n" | ||
80 | #endif | ||
81 | : /* outputs */ | ||
82 | #if ORDER > 32 | ||
83 | [cnt]"+r"(cnt), | ||
84 | #endif | ||
85 | [v1] "+r"(v1), | ||
86 | [f2] "+r"(f2), | ||
87 | [s2] "+r"(s2), | ||
88 | [res]"=r"(res) | ||
89 | : /* inputs */ | ||
90 | : /* clobbers */ | ||
91 | "r0", "r1", "r2", "r3", "r4", | ||
92 | "r5", "r6", "r7", "cc", "memory" | ||
93 | ); | ||
94 | return res; | ||
95 | } | ||
96 | |||
97 | /* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) */ | ||
98 | static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2) | ||
99 | { | ||
100 | int res; | ||
101 | #if ORDER > 32 | ||
102 | int cnt = ORDER>>5; | ||
103 | #endif | ||
104 | |||
105 | asm volatile ( | ||
106 | #if ORDER > 32 | ||
107 | "mov %[res], #0 \n" | ||
108 | "1: \n" | ||
109 | #else | ||
110 | "ldmia %[v1], {r0-r3} \n" | ||
111 | "ldmia %[f2]!, {r4-r7} \n" | ||
112 | "mul %[res], r4, r0 \n" | ||
113 | "mla %[res], r5, r1, %[res] \n" | ||
114 | "mla %[res], r6, r2, %[res] \n" | ||
115 | "mla %[res], r7, r3, %[res] \n" | ||
116 | "ldmia %[s2]!, {r4-r7} \n" | ||
117 | "sub r0, r0, r4 \n" | ||
118 | "sub r1, r1, r5 \n" | ||
119 | "sub r2, r2, r6 \n" | ||
120 | "sub r3, r3, r7 \n" | ||
121 | "stmia %[v1]!, {r0-r3} \n" | ||
122 | #endif | ||
123 | REPEAT_BLOCK( | ||
124 | "ldmia %[v1], {r0-r3} \n" | ||
125 | "ldmia %[f2]!, {r4-r7} \n" | ||
126 | "mla %[res], r4, r0, %[res] \n" | ||
127 | "mla %[res], r5, r1, %[res] \n" | ||
128 | "mla %[res], r6, r2, %[res] \n" | ||
129 | "mla %[res], r7, r3, %[res] \n" | ||
130 | "ldmia %[s2]!, {r4-r7} \n" | ||
131 | "sub r0, r0, r4 \n" | ||
132 | "sub r1, r1, r5 \n" | ||
133 | "sub r2, r2, r6 \n" | ||
134 | "sub r3, r3, r7 \n" | ||
135 | "stmia %[v1]!, {r0-r3} \n" | ||
136 | ) | ||
137 | #if ORDER > 32 | ||
138 | "subs %[cnt], %[cnt], #1 \n" | ||
139 | "bne 1b \n" | ||
140 | #endif | ||
141 | : /* outputs */ | ||
142 | #if ORDER > 32 | ||
143 | [cnt]"+r"(cnt), | ||
144 | #endif | ||
145 | [v1] "+r"(v1), | ||
146 | [f2] "+r"(f2), | ||
147 | [s2] "+r"(s2), | ||
148 | [res]"=r"(res) | ||
149 | : /* inputs */ | ||
150 | : /* clobbers */ | ||
151 | "r0", "r1", "r2", "r3", "r4", | ||
152 | "r5", "r6", "r7", "cc", "memory" | ||
153 | ); | ||
154 | return res; | ||
155 | } | ||
156 | |||
157 | static inline int32_t scalarproduct(int32_t* v1, int32_t* v2) | ||
158 | { | ||
159 | int res; | ||
160 | #if ORDER > 32 | ||
161 | int cnt = ORDER>>5; | ||
162 | #endif | ||
163 | |||
164 | asm volatile ( | ||
165 | #if ORDER > 32 | ||
166 | "mov %[res], #0 \n" | ||
167 | "1: \n" | ||
168 | #else | ||
169 | "ldmia %[v1]!, {r0-r3} \n" | ||
170 | "ldmia %[v2]!, {r4-r7} \n" | ||
171 | "mul %[res], r4, r0 \n" | ||
172 | "mla %[res], r5, r1, %[res] \n" | ||
173 | "mla %[res], r6, r2, %[res] \n" | ||
174 | "mla %[res], r7, r3, %[res] \n" | ||
175 | #endif | ||
176 | REPEAT_BLOCK( | ||
177 | "ldmia %[v1]!, {r0-r3} \n" | ||
178 | "ldmia %[v2]!, {r4-r7} \n" | ||
179 | "mla %[res], r4, r0, %[res] \n" | ||
180 | "mla %[res], r5, r1, %[res] \n" | ||
181 | "mla %[res], r6, r2, %[res] \n" | ||
182 | "mla %[res], r7, r3, %[res] \n" | ||
183 | ) | ||
184 | #if ORDER > 32 | ||
185 | "subs %[cnt], %[cnt], #1 \n" | ||
186 | "bne 1b \n" | ||
187 | #endif | ||
188 | : /* outputs */ | ||
189 | #if ORDER > 32 | ||
190 | [cnt]"+r"(cnt), | ||
191 | #endif | ||
192 | [v1] "+r"(v1), | ||
193 | [v2] "+r"(v2), | ||
194 | [res]"=r"(res) | ||
195 | : /* inputs */ | ||
196 | : /* clobbers */ | ||
197 | "r0", "r1", "r2", "r3", | ||
198 | "r4", "r5", "r6", "r7", "cc", "memory" | ||
199 | ); | ||
200 | return res; | ||
201 | } | ||
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math_generic.h b/lib/rbcodec/codecs/demac/libdemac/vector_math_generic.h new file mode 100644 index 0000000000..00bf07a007 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/vector_math_generic.h | |||
@@ -0,0 +1,160 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | |||
25 | #include "demac_config.h" | ||
26 | |||
27 | static inline void vector_add(filter_int* v1, filter_int* v2) | ||
28 | { | ||
29 | #if ORDER > 32 | ||
30 | int order = (ORDER >> 5); | ||
31 | while (order--) | ||
32 | #endif | ||
33 | { | ||
34 | *v1++ += *v2++; | ||
35 | *v1++ += *v2++; | ||
36 | *v1++ += *v2++; | ||
37 | *v1++ += *v2++; | ||
38 | *v1++ += *v2++; | ||
39 | *v1++ += *v2++; | ||
40 | *v1++ += *v2++; | ||
41 | *v1++ += *v2++; | ||
42 | *v1++ += *v2++; | ||
43 | *v1++ += *v2++; | ||
44 | *v1++ += *v2++; | ||
45 | *v1++ += *v2++; | ||
46 | *v1++ += *v2++; | ||
47 | *v1++ += *v2++; | ||
48 | *v1++ += *v2++; | ||
49 | *v1++ += *v2++; | ||
50 | #if ORDER > 16 | ||
51 | *v1++ += *v2++; | ||
52 | *v1++ += *v2++; | ||
53 | *v1++ += *v2++; | ||
54 | *v1++ += *v2++; | ||
55 | *v1++ += *v2++; | ||
56 | *v1++ += *v2++; | ||
57 | *v1++ += *v2++; | ||
58 | *v1++ += *v2++; | ||
59 | *v1++ += *v2++; | ||
60 | *v1++ += *v2++; | ||
61 | *v1++ += *v2++; | ||
62 | *v1++ += *v2++; | ||
63 | *v1++ += *v2++; | ||
64 | *v1++ += *v2++; | ||
65 | *v1++ += *v2++; | ||
66 | *v1++ += *v2++; | ||
67 | #endif | ||
68 | } | ||
69 | } | ||
70 | |||
71 | static inline void vector_sub(filter_int* v1, filter_int* v2) | ||
72 | { | ||
73 | #if ORDER > 32 | ||
74 | int order = (ORDER >> 5); | ||
75 | while (order--) | ||
76 | #endif | ||
77 | { | ||
78 | *v1++ -= *v2++; | ||
79 | *v1++ -= *v2++; | ||
80 | *v1++ -= *v2++; | ||
81 | *v1++ -= *v2++; | ||
82 | *v1++ -= *v2++; | ||
83 | *v1++ -= *v2++; | ||
84 | *v1++ -= *v2++; | ||
85 | *v1++ -= *v2++; | ||
86 | *v1++ -= *v2++; | ||
87 | *v1++ -= *v2++; | ||
88 | *v1++ -= *v2++; | ||
89 | *v1++ -= *v2++; | ||
90 | *v1++ -= *v2++; | ||
91 | *v1++ -= *v2++; | ||
92 | *v1++ -= *v2++; | ||
93 | *v1++ -= *v2++; | ||
94 | #if ORDER > 16 | ||
95 | *v1++ -= *v2++; | ||
96 | *v1++ -= *v2++; | ||
97 | *v1++ -= *v2++; | ||
98 | *v1++ -= *v2++; | ||
99 | *v1++ -= *v2++; | ||
100 | *v1++ -= *v2++; | ||
101 | *v1++ -= *v2++; | ||
102 | *v1++ -= *v2++; | ||
103 | *v1++ -= *v2++; | ||
104 | *v1++ -= *v2++; | ||
105 | *v1++ -= *v2++; | ||
106 | *v1++ -= *v2++; | ||
107 | *v1++ -= *v2++; | ||
108 | *v1++ -= *v2++; | ||
109 | *v1++ -= *v2++; | ||
110 | *v1++ -= *v2++; | ||
111 | #endif | ||
112 | } | ||
113 | } | ||
114 | |||
115 | static inline int32_t scalarproduct(filter_int* v1, filter_int* v2) | ||
116 | { | ||
117 | int res = 0; | ||
118 | |||
119 | #if ORDER > 32 | ||
120 | int order = (ORDER >> 5); | ||
121 | while (order--) | ||
122 | #endif | ||
123 | { | ||
124 | res += *v1++ * *v2++; | ||
125 | res += *v1++ * *v2++; | ||
126 | res += *v1++ * *v2++; | ||
127 | res += *v1++ * *v2++; | ||
128 | res += *v1++ * *v2++; | ||
129 | res += *v1++ * *v2++; | ||
130 | res += *v1++ * *v2++; | ||
131 | res += *v1++ * *v2++; | ||
132 | res += *v1++ * *v2++; | ||
133 | res += *v1++ * *v2++; | ||
134 | res += *v1++ * *v2++; | ||
135 | res += *v1++ * *v2++; | ||
136 | res += *v1++ * *v2++; | ||
137 | res += *v1++ * *v2++; | ||
138 | res += *v1++ * *v2++; | ||
139 | res += *v1++ * *v2++; | ||
140 | #if ORDER > 16 | ||
141 | res += *v1++ * *v2++; | ||
142 | res += *v1++ * *v2++; | ||
143 | res += *v1++ * *v2++; | ||
144 | res += *v1++ * *v2++; | ||
145 | res += *v1++ * *v2++; | ||
146 | res += *v1++ * *v2++; | ||
147 | res += *v1++ * *v2++; | ||
148 | res += *v1++ * *v2++; | ||
149 | res += *v1++ * *v2++; | ||
150 | res += *v1++ * *v2++; | ||
151 | res += *v1++ * *v2++; | ||
152 | res += *v1++ * *v2++; | ||
153 | res += *v1++ * *v2++; | ||
154 | res += *v1++ * *v2++; | ||
155 | res += *v1++ * *v2++; | ||
156 | res += *v1++ * *v2++; | ||
157 | #endif | ||
158 | } | ||
159 | return res; | ||
160 | } | ||