summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/demac/libdemac
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/demac/libdemac')
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/SOURCES15
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/crc.c120
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/decoder.c216
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/decoder.h40
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/demac.h45
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/demac_config.h145
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/entropy.c464
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/entropy.h40
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/filter.c296
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/filter.h50
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/filter_1280_15.c32
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/filter_16_11.c27
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/filter_256_13.c32
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/filter_32_10.c27
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/filter_64_11.c27
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/parser.c402
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/parser.h137
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/predictor-arm.S702
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/predictor-cf.S660
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/predictor.c271
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/predictor.h38
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/udiv32_arm-pre.S25
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S318
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/vector_math16_armv5te.h404
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h490
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/vector_math16_armv7.h214
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/vector_math16_cf.h364
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h234
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h201
-rw-r--r--lib/rbcodec/codecs/demac/libdemac/vector_math_generic.h160
30 files changed, 6196 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/demac/libdemac/SOURCES b/lib/rbcodec/codecs/demac/libdemac/SOURCES
new file mode 100644
index 0000000000..018f35a73c
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/SOURCES
@@ -0,0 +1,15 @@
1predictor.c
2#ifdef CPU_ARM
3predictor-arm.S
4udiv32_arm.S
5#elif defined CPU_COLDFIRE
6predictor-cf.S
7#endif
8entropy.c
9decoder.c
10parser.c
11filter_1280_15.c
12filter_16_11.c
13filter_256_13.c
14filter_32_10.c
15filter_64_11.c
diff --git a/lib/rbcodec/codecs/demac/libdemac/crc.c b/lib/rbcodec/codecs/demac/libdemac/crc.c
new file mode 100644
index 0000000000..fa3ea89d7e
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/crc.c
@@ -0,0 +1,120 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#include <inttypes.h>
26#include "demac.h"
27
28static const uint32_t crctab32[] =
29{
30 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
31 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
32 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
33 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
34 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE,
35 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
36 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
37 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
38 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
39 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
40 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940,
41 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
42 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116,
43 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
44 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
45 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
46
47 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A,
48 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
49 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818,
50 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
51 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
52 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
53 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C,
54 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
55 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2,
56 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
57 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
58 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
59 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086,
60 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
61 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4,
62 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
63
64 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
65 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
66 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
67 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
68 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE,
69 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
70 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
71 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
72 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252,
73 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
74 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60,
75 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
76 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
77 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
78 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04,
79 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
80
81 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
82 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
83 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
84 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
85 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E,
86 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
87 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
88 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
89 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
90 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
91 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0,
92 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
93 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6,
94 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
95 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
96 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
97};
98
99uint32_t ape_initcrc(void)
100{
101 return 0xffffffff;
102}
103
104/* Update the CRC from a block of WAV-format audio data */
105uint32_t ape_updatecrc(unsigned char *block, int count, uint32_t crc)
106{
107 while (count--)
108 crc = (crc >> 8) ^ crctab32[(crc & 0xff) ^ *block++];
109
110 return crc;
111}
112
113uint32_t ape_finishcrc(uint32_t crc)
114{
115 crc ^= 0xffffffff;
116 crc >>= 1;
117
118 return crc;
119}
120
diff --git a/lib/rbcodec/codecs/demac/libdemac/decoder.c b/lib/rbcodec/codecs/demac/libdemac/decoder.c
new file mode 100644
index 0000000000..b0339a75d9
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/decoder.c
@@ -0,0 +1,216 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#include <inttypes.h>
26#include <string.h>
27
28#include "demac.h"
29#include "predictor.h"
30#include "entropy.h"
31#include "filter.h"
32#include "demac_config.h"
33
34/* Statically allocate the filter buffers */
35
36#ifdef FILTER256_IRAM
37static filter_int filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2]
38 IBSS_ATTR_DEMAC MEM_ALIGN_ATTR;
39 /* 2432 or 4864 bytes */
40static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2]
41 IBSS_ATTR_DEMAC MEM_ALIGN_ATTR;
42 /* 5120 or 10240 bytes */
43#define FILTERBUF64 filterbuf256
44#define FILTERBUF32 filterbuf32
45#define FILTERBUF16 filterbuf32
46#else
47static filter_int filterbuf64[(64*3 + FILTER_HISTORY_SIZE) * 2]
48 IBSS_ATTR_DEMAC MEM_ALIGN_ATTR;
49 /* 2432 or 4864 bytes */
50static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2]
51 MEM_ALIGN_ATTR; /* 5120 or 10240 bytes */
52#define FILTERBUF64 filterbuf64
53#define FILTERBUF32 filterbuf64
54#define FILTERBUF16 filterbuf64
55#endif
56
57/* This is only needed for "insane" files, and no current Rockbox targets
58 can hope to decode them in realtime, except the Gigabeat S (at 528MHz). */
59static filter_int filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2]
60 IBSS_ATTR_DEMAC_INSANEBUF MEM_ALIGN_ATTR;
61 /* 17408 or 34816 bytes */
62
63void init_frame_decoder(struct ape_ctx_t* ape_ctx,
64 unsigned char* inbuffer, int* firstbyte,
65 int* bytesconsumed)
66{
67 init_entropy_decoder(ape_ctx, inbuffer, firstbyte, bytesconsumed);
68 //printf("CRC=0x%08x\n",ape_ctx->CRC);
69 //printf("Flags=0x%08x\n",ape_ctx->frameflags);
70
71 init_predictor_decoder(&ape_ctx->predictor);
72
73 switch (ape_ctx->compressiontype)
74 {
75 case 2000:
76 init_filter_16_11(FILTERBUF16);
77 break;
78
79 case 3000:
80 init_filter_64_11(FILTERBUF64);
81 break;
82
83 case 4000:
84 init_filter_256_13(filterbuf256);
85 init_filter_32_10(FILTERBUF32);
86 break;
87
88 case 5000:
89 init_filter_1280_15(filterbuf1280);
90 init_filter_256_13(filterbuf256);
91 init_filter_16_11(FILTERBUF32);
92 }
93}
94
95int ICODE_ATTR_DEMAC decode_chunk(struct ape_ctx_t* ape_ctx,
96 unsigned char* inbuffer, int* firstbyte,
97 int* bytesconsumed,
98 int32_t* decoded0, int32_t* decoded1,
99 int count)
100{
101 int32_t left, right;
102#ifdef ROCKBOX
103 int scale = (APE_OUTPUT_DEPTH - ape_ctx->bps);
104 #define SCALE(x) ((x) << scale)
105#else
106 #define SCALE(x) (x)
107#endif
108
109 if ((ape_ctx->channels==1) || ((ape_ctx->frameflags
110 & (APE_FRAMECODE_PSEUDO_STEREO|APE_FRAMECODE_STEREO_SILENCE))
111 == APE_FRAMECODE_PSEUDO_STEREO)) {
112
113 entropy_decode(ape_ctx, inbuffer, firstbyte, bytesconsumed,
114 decoded0, NULL, count);
115
116 if (ape_ctx->frameflags & APE_FRAMECODE_MONO_SILENCE) {
117 /* We are pure silence, so we're done. */
118 return 0;
119 }
120
121 switch (ape_ctx->compressiontype)
122 {
123 case 2000:
124 apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count);
125 break;
126
127 case 3000:
128 apply_filter_64_11(ape_ctx->fileversion,0,decoded0,count);
129 break;
130
131 case 4000:
132 apply_filter_32_10(ape_ctx->fileversion,0,decoded0,count);
133 apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count);
134 break;
135
136 case 5000:
137 apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count);
138 apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count);
139 apply_filter_1280_15(ape_ctx->fileversion,0,decoded0,count);
140 }
141
142 /* Now apply the predictor decoding */
143 predictor_decode_mono(&ape_ctx->predictor,decoded0,count);
144
145 if (ape_ctx->channels==2) {
146 /* Pseudo-stereo - copy left channel to right channel */
147 while (count--)
148 {
149 left = *decoded0;
150 *(decoded1++) = *(decoded0++) = SCALE(left);
151 }
152 }
153#ifdef ROCKBOX
154 else {
155 /* Scale to output depth */
156 while (count--)
157 {
158 left = *decoded0;
159 *(decoded0++) = SCALE(left);
160 }
161 }
162#endif
163 } else { /* Stereo */
164 entropy_decode(ape_ctx, inbuffer, firstbyte, bytesconsumed,
165 decoded0, decoded1, count);
166
167 if ((ape_ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE)
168 == APE_FRAMECODE_STEREO_SILENCE) {
169 /* We are pure silence, so we're done. */
170 return 0;
171 }
172
173 /* Apply filters - compression type 1000 doesn't have any */
174 switch (ape_ctx->compressiontype)
175 {
176 case 2000:
177 apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count);
178 apply_filter_16_11(ape_ctx->fileversion,1,decoded1,count);
179 break;
180
181 case 3000:
182 apply_filter_64_11(ape_ctx->fileversion,0,decoded0,count);
183 apply_filter_64_11(ape_ctx->fileversion,1,decoded1,count);
184 break;
185
186 case 4000:
187 apply_filter_32_10(ape_ctx->fileversion,0,decoded0,count);
188 apply_filter_32_10(ape_ctx->fileversion,1,decoded1,count);
189 apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count);
190 apply_filter_256_13(ape_ctx->fileversion,1,decoded1,count);
191 break;
192
193 case 5000:
194 apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count);
195 apply_filter_16_11(ape_ctx->fileversion,1,decoded1,count);
196 apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count);
197 apply_filter_256_13(ape_ctx->fileversion,1,decoded1,count);
198 apply_filter_1280_15(ape_ctx->fileversion,0,decoded0,count);
199 apply_filter_1280_15(ape_ctx->fileversion,1,decoded1,count);
200 }
201
202 /* Now apply the predictor decoding */
203 predictor_decode_stereo(&ape_ctx->predictor,decoded0,decoded1,count);
204
205 /* Decorrelate and scale to output depth */
206 while (count--)
207 {
208 left = *decoded1 - (*decoded0 / 2);
209 right = left + *decoded0;
210
211 *(decoded0++) = SCALE(left);
212 *(decoded1++) = SCALE(right);
213 }
214 }
215 return 0;
216}
diff --git a/lib/rbcodec/codecs/demac/libdemac/decoder.h b/lib/rbcodec/codecs/demac/libdemac/decoder.h
new file mode 100644
index 0000000000..aeac569509
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/decoder.h
@@ -0,0 +1,40 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#ifndef _APE_DECODER_H
26#define _APE_DECODER_H
27
28#include <inttypes.h>
29#include "parser.h"
30
31void init_frame_decoder(struct ape_ctx_t* ape_ctx,
32 unsigned char* inbuffer, int* firstbyte,
33 int* bytesconsumed);
34
35int decode_chunk(struct ape_ctx_t* ape_ctx,
36 unsigned char* inbuffer, int* firstbyte,
37 int* bytesconsumed,
38 int32_t* decoded0, int32_t* decoded1,
39 int count);
40#endif
diff --git a/lib/rbcodec/codecs/demac/libdemac/demac.h b/lib/rbcodec/codecs/demac/libdemac/demac.h
new file mode 100644
index 0000000000..696b2aba73
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/demac.h
@@ -0,0 +1,45 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#ifndef _APE_DECODER_H
26#define _APE_DECODER_H
27
28#include <inttypes.h>
29#include "parser.h"
30
31void init_frame_decoder(struct ape_ctx_t* ape_ctx,
32 unsigned char* inbuffer, int* firstbyte,
33 int* bytesconsumed);
34
35int decode_chunk(struct ape_ctx_t* ape_ctx,
36 unsigned char* inbuffer, int* firstbyte,
37 int* bytesconsumed,
38 int32_t* decoded0, int32_t* decoded1,
39 int count);
40
41uint32_t ape_initcrc(void);
42uint32_t ape_updatecrc(unsigned char *block, int count, uint32_t crc);
43uint32_t ape_finishcrc(uint32_t crc);
44
45#endif
diff --git a/lib/rbcodec/codecs/demac/libdemac/demac_config.h b/lib/rbcodec/codecs/demac/libdemac/demac_config.h
new file mode 100644
index 0000000000..fa4f008036
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/demac_config.h
@@ -0,0 +1,145 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#ifndef _DEMAC_CONFIG_H
26#define _DEMAC_CONFIG_H
27
28/* Build-time choices for libdemac.
29 * Note that this file is included by both .c and .S files. */
30
31#ifdef ROCKBOX
32
33#include "config.h"
34
35#ifndef __ASSEMBLER__
36#include "codeclib.h"
37#include <codecs.h>
38#endif
39
40#define APE_OUTPUT_DEPTH 29
41
42/* On ARMv4, using 32 bit ints for the filters is faster. */
43#if defined(CPU_ARM) && (ARM_ARCH == 4)
44#define FILTER_BITS 32
45#endif
46
47#if !defined(CPU_PP) && !defined(CPU_S5L870X)
48#define FILTER256_IRAM
49#endif
50
51#if CONFIG_CPU == PP5002 || defined(CPU_S5L870X)
52/* Code and data IRAM for speed (PP5002 has a broken cache), not enough IRAM
53 * for the insane filter buffer. Reciprocal table for division in IRAM. */
54#define ICODE_SECTION_DEMAC_ARM .icode
55#define ICODE_ATTR_DEMAC ICODE_ATTR
56#define ICONST_ATTR_DEMAC ICONST_ATTR
57#define IBSS_ATTR_DEMAC IBSS_ATTR
58#define IBSS_ATTR_DEMAC_INSANEBUF
59
60#elif CONFIG_CPU == PP5020
61/* Code and small data in DRAM for speed (PP5020 IRAM isn't completely single
62 * cycle). Insane filter buffer not in IRAM in favour of reciprocal table for
63 * divison. Decoded data buffers should be in IRAM (defined by the caller). */
64#define ICODE_SECTION_DEMAC_ARM .text
65#define ICODE_ATTR_DEMAC
66#define ICONST_ATTR_DEMAC
67#define IBSS_ATTR_DEMAC
68#define IBSS_ATTR_DEMAC_INSANEBUF
69
70#elif CONFIG_CPU == PP5022
71/* Code in DRAM, data in IRAM. Insane filter buffer not in IRAM in favour of
72 * reciprocal table for divison */
73#define ICODE_SECTION_DEMAC_ARM .text
74#define ICODE_ATTR_DEMAC
75#define ICONST_ATTR_DEMAC ICONST_ATTR
76#define IBSS_ATTR_DEMAC IBSS_ATTR
77#define IBSS_ATTR_DEMAC_INSANEBUF
78
79#else
80/* Code in DRAM, data in IRAM, including insane filter buffer. */
81#define ICODE_SECTION_DEMAC_ARM .text
82#define ICODE_ATTR_DEMAC
83#define ICONST_ATTR_DEMAC ICONST_ATTR
84#define IBSS_ATTR_DEMAC IBSS_ATTR
85#define IBSS_ATTR_DEMAC_INSANEBUF IBSS_ATTR
86#endif
87
88#else /* !ROCKBOX */
89
90#define APE_OUTPUT_DEPTH (ape_ctx->bps)
91
92#define MEM_ALIGN_ATTR __attribute__((aligned(16)))
93 /* adjust to target architecture for best performance */
94
95#define ICODE_ATTR_DEMAC
96#define ICONST_ATTR_DEMAC
97#define IBSS_ATTR_DEMAC
98#define IBSS_ATTR_DEMAC_INSANEBUF
99
100/* Use to give gcc hints on which branch is most likely taken */
101#if defined(__GNUC__) && __GNUC__ >= 3
102#define LIKELY(x) __builtin_expect(!!(x), 1)
103#define UNLIKELY(x) __builtin_expect(!!(x), 0)
104#else
105#define LIKELY(x) (x)
106#define UNLIKELY(x) (x)
107#endif
108
109#endif /* !ROCKBOX */
110
111/* Defaults */
112
113#ifndef FILTER_HISTORY_SIZE
114#define FILTER_HISTORY_SIZE 512
115#endif
116
117#ifndef PREDICTOR_HISTORY_SIZE
118#define PREDICTOR_HISTORY_SIZE 512
119#endif
120
121#ifndef FILTER_BITS
122#define FILTER_BITS 16
123#endif
124
125
126#ifndef __ASSEMBLER__
127
128#if defined(CPU_ARM) && (ARM_ARCH < 5 || defined(USE_IRAM))
129/* optimised unsigned integer division for ARMv4, in IRAM */
130unsigned udiv32_arm(unsigned a, unsigned b);
131#define UDIV32(a, b) udiv32_arm(a, b)
132#else
133/* default */
134#define UDIV32(a, b) (a / b)
135#endif
136
137#include <inttypes.h>
138#if FILTER_BITS == 32
139typedef int32_t filter_int;
140#elif FILTER_BITS == 16
141typedef int16_t filter_int;
142#endif
143#endif
144
145#endif /* _DEMAC_CONFIG_H */
diff --git a/lib/rbcodec/codecs/demac/libdemac/entropy.c b/lib/rbcodec/codecs/demac/libdemac/entropy.c
new file mode 100644
index 0000000000..1cef979808
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/entropy.c
@@ -0,0 +1,464 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#include <inttypes.h>
26#include <string.h>
27
28#include "parser.h"
29#include "entropy.h"
30#include "demac_config.h"
31
32#define MODEL_ELEMENTS 64
33
34/*
35 The following counts arrays for use with the range decoder are
36 hard-coded in the Monkey's Audio decoder.
37*/
38
39static const int counts_3970[65] ICONST_ATTR_DEMAC =
40{
41 0,14824,28224,39348,47855,53994,58171,60926,
42 62682,63786,64463,64878,65126,65276,65365,65419,
43 65450,65469,65480,65487,65491,65493,65494,65495,
44 65496,65497,65498,65499,65500,65501,65502,65503,
45 65504,65505,65506,65507,65508,65509,65510,65511,
46 65512,65513,65514,65515,65516,65517,65518,65519,
47 65520,65521,65522,65523,65524,65525,65526,65527,
48 65528,65529,65530,65531,65532,65533,65534,65535,
49 65536
50};
51
52/* counts_diff_3970[i] = counts_3970[i+1] - counts_3970[i] */
53static const int counts_diff_3970[64] ICONST_ATTR_DEMAC =
54{
55 14824,13400,11124,8507,6139,4177,2755,1756,
56 1104,677,415,248,150,89,54,31,
57 19,11,7,4,2,1,1,1,
58 1,1,1,1,1,1,1,1,
59 1,1,1,1,1,1,1,1,
60 1,1,1,1,1,1,1,1,
61 1,1,1,1,1,1,1,1,
62 1,1,1,1,1,1,1,1
63};
64
65static const int counts_3980[65] ICONST_ATTR_DEMAC =
66{
67 0,19578,36160,48417,56323,60899,63265,64435,
68 64971,65232,65351,65416,65447,65466,65476,65482,
69 65485,65488,65490,65491,65492,65493,65494,65495,
70 65496,65497,65498,65499,65500,65501,65502,65503,
71 65504,65505,65506,65507,65508,65509,65510,65511,
72 65512,65513,65514,65515,65516,65517,65518,65519,
73 65520,65521,65522,65523,65524,65525,65526,65527,
74 65528,65529,65530,65531,65532,65533,65534,65535,
75 65536
76};
77
78/* counts_diff_3980[i] = counts_3980[i+1] - counts_3980[i] */
79
80static const int counts_diff_3980[64] ICONST_ATTR_DEMAC =
81{
82 19578,16582,12257,7906,4576,2366,1170,536,
83 261,119,65,31,19,10,6,3,
84 3,2,1,1,1,1,1,1,
85 1,1,1,1,1,1,1,1,
86 1,1,1,1,1,1,1,1,
87 1,1,1,1,1,1,1,1,
88 1,1,1,1,1,1,1,1,
89 1,1,1,1,1,1,1,1
90};
91
92/*
93
94Range decoder adapted from rangecod.c included in:
95
96 http://www.compressconsult.com/rangecoder/rngcod13.zip
97
98 rangecod.c range encoding
99
100 (c) Michael Schindler
101 1997, 1998, 1999, 2000
102 http://www.compressconsult.com/
103 michael@compressconsult.com
104
105 This program is free software; you can redistribute it and/or modify
106 it under the terms of the GNU General Public License as published by
107 the Free Software Foundation; either version 2 of the License, or
108 (at your option) any later version.
109
110
111The encoding functions were removed, and functions turned into "static
112inline" functions. Some minor cosmetic changes were made (e.g. turning
113pre-processor symbols into upper-case, removing the rc parameter from
114each function (and the RNGC macro)).
115
116*/
117
118/* BITSTREAM READING FUNCTIONS */
119
120/* We deal with the input data one byte at a time - to ensure
121 functionality on CPUs of any endianness regardless of any requirements
122 for aligned reads.
123*/
124
125static unsigned char* bytebuffer IBSS_ATTR_DEMAC;
126static int bytebufferoffset IBSS_ATTR_DEMAC;
127
128static inline void skip_byte(void)
129{
130 bytebufferoffset--;
131 bytebuffer += bytebufferoffset & 4;
132 bytebufferoffset &= 3;
133}
134
135static inline int read_byte(void)
136{
137 int ch = bytebuffer[bytebufferoffset];
138
139 skip_byte();
140
141 return ch;
142}
143
144/* RANGE DECODING FUNCTIONS */
145
146/* SIZE OF RANGE ENCODING CODE VALUES. */
147
148#define CODE_BITS 32
149#define TOP_VALUE ((unsigned int)1 << (CODE_BITS-1))
150#define SHIFT_BITS (CODE_BITS - 9)
151#define EXTRA_BITS ((CODE_BITS-2) % 8 + 1)
152#define BOTTOM_VALUE (TOP_VALUE >> 8)
153
154struct rangecoder_t
155{
156 uint32_t low; /* low end of interval */
157 uint32_t range; /* length of interval */
158 uint32_t help; /* bytes_to_follow resp. intermediate value */
159 unsigned int buffer; /* buffer for input/output */
160};
161
162static struct rangecoder_t rc IBSS_ATTR_DEMAC;
163
164/* Start the decoder */
165static inline void range_start_decoding(void)
166{
167 rc.buffer = read_byte();
168 rc.low = rc.buffer >> (8 - EXTRA_BITS);
169 rc.range = (uint32_t) 1 << EXTRA_BITS;
170}
171
172static inline void range_dec_normalize(void)
173{
174 while (rc.range <= BOTTOM_VALUE)
175 {
176 rc.buffer = (rc.buffer << 8) | read_byte();
177 rc.low = (rc.low << 8) | ((rc.buffer >> 1) & 0xff);
178 rc.range <<= 8;
179 }
180}
181
182/* Calculate culmulative frequency for next symbol. Does NO update!*/
183/* tot_f is the total frequency */
184/* or: totf is (code_value)1<<shift */
185/* returns the culmulative frequency */
186static inline int range_decode_culfreq(int tot_f)
187{
188 range_dec_normalize();
189 rc.help = UDIV32(rc.range, tot_f);
190 return UDIV32(rc.low, rc.help);
191}
192
193static inline int range_decode_culshift(int shift)
194{
195 range_dec_normalize();
196 rc.help = rc.range >> shift;
197 return UDIV32(rc.low, rc.help);
198}
199
200
201/* Update decoding state */
202/* sy_f is the interval length (frequency of the symbol) */
203/* lt_f is the lower end (frequency sum of < symbols) */
204static inline void range_decode_update(int sy_f, int lt_f)
205{
206 rc.low -= rc.help * lt_f;
207 rc.range = rc.help * sy_f;
208}
209
210
211/* Decode a byte/short without modelling */
212static inline unsigned char decode_byte(void)
213{ int tmp = range_decode_culshift(8);
214 range_decode_update( 1,tmp);
215 return tmp;
216}
217
218static inline unsigned short range_decode_short(void)
219{ int tmp = range_decode_culshift(16);
220 range_decode_update( 1,tmp);
221 return tmp;
222}
223
224/* Decode n bits (n <= 16) without modelling - based on range_decode_short */
225static inline int range_decode_bits(int n)
226{ int tmp = range_decode_culshift(n);
227 range_decode_update( 1,tmp);
228 return tmp;
229}
230
231
232/* Finish decoding */
233static inline void range_done_decoding(void)
234{ range_dec_normalize(); /* normalize to use up all bytes */
235}
236
237/*
238 range_get_symbol_* functions based on main decoding loop in simple_d.c from
239 http://www.compressconsult.com/rangecoder/rngcod13.zip
240 (c) Michael Schindler
241*/
242
243static inline int range_get_symbol_3980(void)
244{
245 int symbol, cf;
246
247 cf = range_decode_culshift(16);
248
249 /* figure out the symbol inefficiently; a binary search would be much better */
250 for (symbol = 0; counts_3980[symbol+1] <= cf; symbol++);
251
252 range_decode_update(counts_diff_3980[symbol],counts_3980[symbol]);
253
254 return symbol;
255}
256
257static inline int range_get_symbol_3970(void)
258{
259 int symbol, cf;
260
261 cf = range_decode_culshift(16);
262
263 /* figure out the symbol inefficiently; a binary search would be much better */
264 for (symbol = 0; counts_3970[symbol+1] <= cf; symbol++);
265
266 range_decode_update(counts_diff_3970[symbol],counts_3970[symbol]);
267
268 return symbol;
269}
270
271/* MAIN DECODING FUNCTIONS */
272
273struct rice_t
274{
275 uint32_t k;
276 uint32_t ksum;
277};
278
279static struct rice_t riceX IBSS_ATTR_DEMAC;
280static struct rice_t riceY IBSS_ATTR_DEMAC;
281
282static inline void update_rice(struct rice_t* rice, int x)
283{
284 rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5);
285
286 if (UNLIKELY(rice->k == 0)) {
287 rice->k = 1;
288 } else {
289 uint32_t lim = 1 << (rice->k + 4);
290 if (UNLIKELY(rice->ksum < lim)) {
291 rice->k--;
292 } else if (UNLIKELY(rice->ksum >= 2 * lim)) {
293 rice->k++;
294 }
295 }
296}
297
298static inline int entropy_decode3980(struct rice_t* rice)
299{
300 int base, x, pivot, overflow;
301
302 pivot = rice->ksum >> 5;
303 if (UNLIKELY(pivot == 0))
304 pivot=1;
305
306 overflow = range_get_symbol_3980();
307
308 if (UNLIKELY(overflow == (MODEL_ELEMENTS-1))) {
309 overflow = range_decode_short() << 16;
310 overflow |= range_decode_short();
311 }
312
313 if (pivot >= 0x10000) {
314 /* Codepath for 24-bit streams */
315 int nbits, lo_bits, base_hi, base_lo;
316
317 /* Count the number of bits in pivot */
318 nbits = 17; /* We know there must be at least 17 bits */
319 while ((pivot >> nbits) > 0) { nbits++; }
320
321 /* base_lo is the low (nbits-16) bits of base
322 base_hi is the high 16 bits of base
323 */
324 lo_bits = (nbits - 16);
325
326 base_hi = range_decode_culfreq((pivot >> lo_bits) + 1);
327 range_decode_update(1, base_hi);
328
329 base_lo = range_decode_culshift(lo_bits);
330 range_decode_update(1, base_lo);
331
332 base = (base_hi << lo_bits) + base_lo;
333 } else {
334 /* Codepath for 16-bit streams */
335 base = range_decode_culfreq(pivot);
336 range_decode_update(1, base);
337 }
338
339 x = base + (overflow * pivot);
340 update_rice(rice, x);
341
342 /* Convert to signed */
343 if (x & 1)
344 return (x >> 1) + 1;
345 else
346 return -(x >> 1);
347}
348
349
350static inline int entropy_decode3970(struct rice_t* rice)
351{
352 int x, tmpk;
353
354 int overflow = range_get_symbol_3970();
355
356 if (UNLIKELY(overflow == (MODEL_ELEMENTS - 1))) {
357 tmpk = range_decode_bits(5);
358 overflow = 0;
359 } else {
360 tmpk = (rice->k < 1) ? 0 : rice->k - 1;
361 }
362
363 if (tmpk <= 16) {
364 x = range_decode_bits(tmpk);
365 } else {
366 x = range_decode_short();
367 x |= (range_decode_bits(tmpk - 16) << 16);
368 }
369 x += (overflow << tmpk);
370
371 update_rice(rice, x);
372
373 /* Convert to signed */
374 if (x & 1)
375 return (x >> 1) + 1;
376 else
377 return -(x >> 1);
378}
379
380void init_entropy_decoder(struct ape_ctx_t* ape_ctx,
381 unsigned char* inbuffer, int* firstbyte,
382 int* bytesconsumed)
383{
384 bytebuffer = inbuffer;
385 bytebufferoffset = *firstbyte;
386
387 /* Read the CRC */
388 ape_ctx->CRC = read_byte();
389 ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
390 ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
391 ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
392
393 /* Read the frame flags if they exist */
394 ape_ctx->frameflags = 0;
395 if ((ape_ctx->fileversion > 3820) && (ape_ctx->CRC & 0x80000000)) {
396 ape_ctx->CRC &= ~0x80000000;
397
398 ape_ctx->frameflags = read_byte();
399 ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
400 ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
401 ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
402 }
403 /* Keep a count of the blocks decoded in this frame */
404 ape_ctx->blocksdecoded = 0;
405
406 /* Initialise the rice structs */
407 riceX.k = 10;
408 riceX.ksum = (1 << riceX.k) * 16;
409 riceY.k = 10;
410 riceY.ksum = (1 << riceY.k) * 16;
411
412 /* The first 8 bits of input are ignored. */
413 skip_byte();
414
415 range_start_decoding();
416
417 /* Return the new state of the buffer */
418 *bytesconsumed = (intptr_t)bytebuffer - (intptr_t)inbuffer;
419 *firstbyte = bytebufferoffset;
420}
421
422void ICODE_ATTR_DEMAC entropy_decode(struct ape_ctx_t* ape_ctx,
423 unsigned char* inbuffer, int* firstbyte,
424 int* bytesconsumed,
425 int32_t* decoded0, int32_t* decoded1,
426 int blockstodecode)
427{
428 bytebuffer = inbuffer;
429 bytebufferoffset = *firstbyte;
430
431 ape_ctx->blocksdecoded += blockstodecode;
432
433 if ((ape_ctx->frameflags & APE_FRAMECODE_LEFT_SILENCE)
434 && ((ape_ctx->frameflags & APE_FRAMECODE_RIGHT_SILENCE)
435 || (decoded1 == NULL))) {
436 /* We are pure silence, just memset the output buffer. */
437 memset(decoded0, 0, blockstodecode * sizeof(int32_t));
438 if (decoded1 != NULL)
439 memset(decoded1, 0, blockstodecode * sizeof(int32_t));
440 } else {
441 if (ape_ctx->fileversion > 3970) {
442 while (LIKELY(blockstodecode--)) {
443 *(decoded0++) = entropy_decode3980(&riceY);
444 if (decoded1 != NULL)
445 *(decoded1++) = entropy_decode3980(&riceX);
446 }
447 } else {
448 while (LIKELY(blockstodecode--)) {
449 *(decoded0++) = entropy_decode3970(&riceY);
450 if (decoded1 != NULL)
451 *(decoded1++) = entropy_decode3970(&riceX);
452 }
453 }
454 }
455
456 if (ape_ctx->blocksdecoded == ape_ctx->currentframeblocks)
457 {
458 range_done_decoding();
459 }
460
461 /* Return the new state of the buffer */
462 *bytesconsumed = bytebuffer - inbuffer;
463 *firstbyte = bytebufferoffset;
464}
diff --git a/lib/rbcodec/codecs/demac/libdemac/entropy.h b/lib/rbcodec/codecs/demac/libdemac/entropy.h
new file mode 100644
index 0000000000..fac2a44d99
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/entropy.h
@@ -0,0 +1,40 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#ifndef _APE_ENTROPY_H
26#define _APE_ENTROPY_H
27
28#include <inttypes.h>
29
30void init_entropy_decoder(struct ape_ctx_t* ape_ctx,
31 unsigned char* inbuffer, int* firstbyte,
32 int* bytesconsumed);
33
34void entropy_decode(struct ape_ctx_t* ape_ctx,
35 unsigned char* inbuffer, int* firstbyte,
36 int* bytesconsumed,
37 int32_t* decoded0, int32_t* decoded1,
38 int blockstodecode);
39
40#endif
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter.c b/lib/rbcodec/codecs/demac/libdemac/filter.c
new file mode 100644
index 0000000000..903885cf00
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/filter.c
@@ -0,0 +1,296 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#include <string.h>
26#include <inttypes.h>
27
28#include "demac.h"
29#include "filter.h"
30#include "demac_config.h"
31
32#if FILTER_BITS == 32
33
34#if defined(CPU_ARM) && (ARM_ARCH == 4)
35#include "vector_math32_armv4.h"
36#else
37#include "vector_math_generic.h"
38#endif
39
40#else /* FILTER_BITS == 16 */
41
42#ifdef CPU_COLDFIRE
43#include "vector_math16_cf.h"
44#elif defined(CPU_ARM) && (ARM_ARCH >= 7)
45#include "vector_math16_armv7.h"
46#elif defined(CPU_ARM) && (ARM_ARCH >= 6)
47#include "vector_math16_armv6.h"
48#elif defined(CPU_ARM) && (ARM_ARCH >= 5)
49/* Assume all our ARMv5 targets are ARMv5te(j) */
50#include "vector_math16_armv5te.h"
51#elif (defined(__i386__) || defined(__i486__)) && defined(__MMX__) \
52 || defined(__x86_64__)
53#include "vector_math16_mmx.h"
54#else
55#include "vector_math_generic.h"
56#endif
57
58#endif /* FILTER_BITS */
59
60struct filter_t {
61 filter_int* coeffs; /* ORDER entries */
62
63 /* We store all the filter delays in a single buffer */
64 filter_int* history_end;
65
66 filter_int* delay;
67 filter_int* adaptcoeffs;
68
69 int avg;
70};
71
72/* We name the functions according to the ORDER and FRACBITS
73 pre-processor symbols and build multiple .o files from this .c file
74 - this increases code-size but gives the compiler more scope for
75 optimising the individual functions, as well as replacing a lot of
76 variables with constants.
77*/
78
79#if FRACBITS == 11
80 #if ORDER == 16
81 #define INIT_FILTER init_filter_16_11
82 #define APPLY_FILTER apply_filter_16_11
83 #elif ORDER == 64
84 #define INIT_FILTER init_filter_64_11
85 #define APPLY_FILTER apply_filter_64_11
86 #endif
87#elif FRACBITS == 13
88 #define INIT_FILTER init_filter_256_13
89 #define APPLY_FILTER apply_filter_256_13
90#elif FRACBITS == 10
91 #define INIT_FILTER init_filter_32_10
92 #define APPLY_FILTER apply_filter_32_10
93#elif FRACBITS == 15
94 #define INIT_FILTER init_filter_1280_15
95 #define APPLY_FILTER apply_filter_1280_15
96#endif
97
98/* Some macros to handle the fixed-point stuff */
99
100/* Convert from (32-FRACBITS).FRACBITS fixed-point format to an
101 integer (rounding to nearest). */
102#define FP_HALF (1 << (FRACBITS - 1)) /* 0.5 in fixed-point format. */
103#define FP_TO_INT(x) ((x + FP_HALF) >> FRACBITS) /* round(x) */
104
105#ifdef CPU_ARM
106#if ARM_ARCH >= 6
107#define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; })
108#else /* ARM_ARCH < 6 */
109/* Keeping the asr #31 outside of the asm allows loads to be scheduled between
110 it and the rest of the block on ARM9E, with the load's result latency filled
111 by the other calculations. */
112#define SATURATE(x) ({ \
113 int __res = (x) >> 31; \
114 asm volatile ( \
115 "teq %0, %1, asr #15\n\t" \
116 "moveq %0, %1\n\t" \
117 "eorne %0, %0, #0xff\n\t" \
118 "eorne %0, %0, #0x7f00" \
119 : "+r" (__res) : "r" (x) : "cc" \
120 ); \
121 __res; \
122})
123#endif /* ARM_ARCH */
124#else /* CPU_ARM */
125#define SATURATE(x) (LIKELY((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF)
126#endif
127
128/* Apply the filter with state f to count entries in data[] */
129
130static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f,
131 int32_t* data, int count)
132{
133 int res;
134 int absres;
135
136#ifdef PREPARE_SCALARPRODUCT
137 PREPARE_SCALARPRODUCT
138#endif
139
140 while(LIKELY(count--))
141 {
142#ifdef FUSED_VECTOR_MATH
143 if (LIKELY(*data != 0)) {
144 if (*data < 0)
145 res = vector_sp_add(f->coeffs, f->delay - ORDER,
146 f->adaptcoeffs - ORDER);
147 else
148 res = vector_sp_sub(f->coeffs, f->delay - ORDER,
149 f->adaptcoeffs - ORDER);
150 } else {
151 res = scalarproduct(f->coeffs, f->delay - ORDER);
152 }
153 res = FP_TO_INT(res);
154#else
155 res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER));
156
157 if (LIKELY(*data != 0)) {
158 if (*data < 0)
159 vector_add(f->coeffs, f->adaptcoeffs - ORDER);
160 else
161 vector_sub(f->coeffs, f->adaptcoeffs - ORDER);
162 }
163#endif
164
165 res += *data;
166
167 *data++ = res;
168
169 /* Update the output history */
170 *f->delay++ = SATURATE(res);
171
172 /* Version 3.98 and later files */
173
174 /* Update the adaption coefficients */
175 absres = (res < 0 ? -res : res);
176
177 if (UNLIKELY(absres > 3 * f->avg))
178 *f->adaptcoeffs = ((res >> 25) & 64) - 32;
179 else if (3 * absres > 4 * f->avg)
180 *f->adaptcoeffs = ((res >> 26) & 32) - 16;
181 else if (LIKELY(absres > 0))
182 *f->adaptcoeffs = ((res >> 27) & 16) - 8;
183 else
184 *f->adaptcoeffs = 0;
185
186 f->avg += (absres - f->avg) / 16;
187
188 f->adaptcoeffs[-1] >>= 1;
189 f->adaptcoeffs[-2] >>= 1;
190 f->adaptcoeffs[-8] >>= 1;
191
192 f->adaptcoeffs++;
193
194 /* Have we filled the history buffer? */
195 if (UNLIKELY(f->delay == f->history_end)) {
196 memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
197 (ORDER*2) * sizeof(filter_int));
198 f->adaptcoeffs = f->coeffs + ORDER*2;
199 f->delay = f->coeffs + ORDER*3;
200 }
201 }
202}
203
204static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f,
205 int32_t* data, int count)
206{
207 int res;
208
209#ifdef PREPARE_SCALARPRODUCT
210 PREPARE_SCALARPRODUCT
211#endif
212
213 while(LIKELY(count--))
214 {
215#ifdef FUSED_VECTOR_MATH
216 if (LIKELY(*data != 0)) {
217 if (*data < 0)
218 res = vector_sp_add(f->coeffs, f->delay - ORDER,
219 f->adaptcoeffs - ORDER);
220 else
221 res = vector_sp_sub(f->coeffs, f->delay - ORDER,
222 f->adaptcoeffs - ORDER);
223 } else {
224 res = scalarproduct(f->coeffs, f->delay - ORDER);
225 }
226 res = FP_TO_INT(res);
227#else
228 res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER));
229
230 if (LIKELY(*data != 0)) {
231 if (*data < 0)
232 vector_add(f->coeffs, f->adaptcoeffs - ORDER);
233 else
234 vector_sub(f->coeffs, f->adaptcoeffs - ORDER);
235 }
236#endif
237
238 /* Convert res from (32-FRACBITS).FRACBITS fixed-point format to an
239 integer (rounding to nearest) and add the input value to
240 it */
241 res += *data;
242
243 *data++ = res;
244
245 /* Update the output history */
246 *f->delay++ = SATURATE(res);
247
248 /* Version ??? to < 3.98 files (untested) */
249 f->adaptcoeffs[0] = (res == 0) ? 0 : ((res >> 28) & 8) - 4;
250 f->adaptcoeffs[-4] >>= 1;
251 f->adaptcoeffs[-8] >>= 1;
252
253 f->adaptcoeffs++;
254
255 /* Have we filled the history buffer? */
256 if (UNLIKELY(f->delay == f->history_end)) {
257 memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
258 (ORDER*2) * sizeof(filter_int));
259 f->adaptcoeffs = f->coeffs + ORDER*2;
260 f->delay = f->coeffs + ORDER*3;
261 }
262 }
263}
264
265static struct filter_t filter[2] IBSS_ATTR_DEMAC;
266
267static void do_init_filter(struct filter_t* f, filter_int* buf)
268{
269 f->coeffs = buf;
270 f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE;
271
272 /* Init pointers */
273 f->adaptcoeffs = f->coeffs + ORDER*2;
274 f->delay = f->coeffs + ORDER*3;
275
276 /* Zero coefficients and history buffer */
277 memset(f->coeffs, 0, ORDER*3 * sizeof(filter_int));
278
279 /* Zero the running average */
280 f->avg = 0;
281}
282
283void INIT_FILTER(filter_int* buf)
284{
285 do_init_filter(&filter[0], buf);
286 do_init_filter(&filter[1], buf + ORDER*3 + FILTER_HISTORY_SIZE);
287}
288
289void ICODE_ATTR_DEMAC APPLY_FILTER(int fileversion, int channel,
290 int32_t* data, int count)
291{
292 if (fileversion >= 3980)
293 do_apply_filter_3980(&filter[channel], data, count);
294 else
295 do_apply_filter_3970(&filter[channel], data, count);
296}
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter.h b/lib/rbcodec/codecs/demac/libdemac/filter.h
new file mode 100644
index 0000000000..609ea12496
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/filter.h
@@ -0,0 +1,50 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#ifndef _APE_FILTER_H
26#define _APE_FILTER_H
27
28#include "demac_config.h"
29
30void init_filter_16_11(filter_int* buf);
31void apply_filter_16_11(int fileversion, int channel,
32 int32_t* decoded, int count);
33
34void init_filter_64_11(filter_int* buf);
35void apply_filter_64_11(int fileversion, int channel,
36 int32_t* decoded, int count);
37
38void init_filter_32_10(filter_int* buf);
39void apply_filter_32_10(int fileversion, int channel,
40 int32_t* decoded, int count);
41
42void init_filter_256_13(filter_int* buf);
43void apply_filter_256_13(int fileversion, int channel,
44 int32_t* decoded, int count);
45
46void init_filter_1280_15(filter_int* buf);
47void apply_filter_1280_15(int fileversion, int channel,
48 int32_t* decoded, int count);
49
50#endif
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter_1280_15.c b/lib/rbcodec/codecs/demac/libdemac/filter_1280_15.c
new file mode 100644
index 0000000000..f2301fb02a
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/filter_1280_15.c
@@ -0,0 +1,32 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#include "demac_config.h"
26#ifndef FILTER256_IRAM
27#undef ICODE_ATTR_DEMAC
28#define ICODE_ATTR_DEMAC
29#endif
30#define ORDER 1280
31#define FRACBITS 15
32#include "filter.c"
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter_16_11.c b/lib/rbcodec/codecs/demac/libdemac/filter_16_11.c
new file mode 100644
index 0000000000..94c56e247f
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/filter_16_11.c
@@ -0,0 +1,27 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#define ORDER 16
26#define FRACBITS 11
27#include "filter.c"
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter_256_13.c b/lib/rbcodec/codecs/demac/libdemac/filter_256_13.c
new file mode 100644
index 0000000000..9e4b9fcb13
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/filter_256_13.c
@@ -0,0 +1,32 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#include "demac_config.h"
26#ifndef FILTER256_IRAM
27#undef ICODE_ATTR_DEMAC
28#define ICODE_ATTR_DEMAC
29#endif
30#define ORDER 256
31#define FRACBITS 13
32#include "filter.c"
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter_32_10.c b/lib/rbcodec/codecs/demac/libdemac/filter_32_10.c
new file mode 100644
index 0000000000..5ec85089db
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/filter_32_10.c
@@ -0,0 +1,27 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#define ORDER 32
26#define FRACBITS 10
27#include "filter.c"
diff --git a/lib/rbcodec/codecs/demac/libdemac/filter_64_11.c b/lib/rbcodec/codecs/demac/libdemac/filter_64_11.c
new file mode 100644
index 0000000000..cd74fa5f6b
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/filter_64_11.c
@@ -0,0 +1,27 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#define ORDER 64
26#define FRACBITS 11
27#include "filter.c"
diff --git a/lib/rbcodec/codecs/demac/libdemac/parser.c b/lib/rbcodec/codecs/demac/libdemac/parser.c
new file mode 100644
index 0000000000..2af4a292b8
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/parser.c
@@ -0,0 +1,402 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#include <inttypes.h>
26#include <string.h>
27#ifndef ROCKBOX
28#include <stdio.h>
29#include <stdlib.h>
30#include "inttypes.h"
31#include <sys/stat.h>
32#include <fcntl.h>
33#include <unistd.h>
34#endif
35
36#include "parser.h"
37
38#ifdef APE_MAX
39#undef APE_MAX
40#endif
41#define APE_MAX(a,b) ((a)>(b)?(a):(b))
42
43
44static inline int16_t get_int16(unsigned char* buf)
45{
46 return(buf[0] | (buf[1] << 8));
47}
48
49static inline uint16_t get_uint16(unsigned char* buf)
50{
51 return(buf[0] | (buf[1] << 8));
52}
53
54static inline uint32_t get_uint32(unsigned char* buf)
55{
56 return(buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24));
57}
58
59
60int ape_parseheaderbuf(unsigned char* buf, struct ape_ctx_t* ape_ctx)
61{
62 unsigned char* header;
63
64 memset(ape_ctx,0,sizeof(struct ape_ctx_t));
65 /* TODO: Skip any leading junk such as id3v2 tags */
66 ape_ctx->junklength = 0;
67
68 memcpy(ape_ctx->magic, buf, 4);
69 if (memcmp(ape_ctx->magic,"MAC ",4)!=0)
70 {
71 return -1;
72 }
73
74 ape_ctx->fileversion = get_int16(buf + 4);
75
76 if (ape_ctx->fileversion >= 3980)
77 {
78 ape_ctx->padding1 = get_int16(buf + 6);
79 ape_ctx->descriptorlength = get_uint32(buf + 8);
80 ape_ctx->headerlength = get_uint32(buf + 12);
81 ape_ctx->seektablelength = get_uint32(buf + 16);
82 ape_ctx->wavheaderlength = get_uint32(buf + 20);
83 ape_ctx->audiodatalength = get_uint32(buf + 24);
84 ape_ctx->audiodatalength_high = get_uint32(buf + 28);
85 ape_ctx->wavtaillength = get_uint32(buf + 32);
86 memcpy(ape_ctx->md5, buf + 36, 16);
87
88 header = buf + ape_ctx->descriptorlength;
89
90 /* Read header data */
91 ape_ctx->compressiontype = get_uint16(header + 0);
92 ape_ctx->formatflags = get_uint16(header + 2);
93 ape_ctx->blocksperframe = get_uint32(header + 4);
94 ape_ctx->finalframeblocks = get_uint32(header + 8);
95 ape_ctx->totalframes = get_uint32(header + 12);
96 ape_ctx->bps = get_uint16(header + 16);
97 ape_ctx->channels = get_uint16(header + 18);
98 ape_ctx->samplerate = get_uint32(header + 20);
99
100 ape_ctx->seektablefilepos = ape_ctx->junklength +
101 ape_ctx->descriptorlength +
102 ape_ctx->headerlength;
103
104 ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->descriptorlength +
105 ape_ctx->headerlength + ape_ctx->seektablelength +
106 ape_ctx->wavheaderlength;
107 } else {
108 ape_ctx->headerlength = 32;
109 ape_ctx->compressiontype = get_uint16(buf + 6);
110 ape_ctx->formatflags = get_uint16(buf + 8);
111 ape_ctx->channels = get_uint16(buf + 10);
112 ape_ctx->samplerate = get_uint32(buf + 12);
113 ape_ctx->wavheaderlength = get_uint32(buf + 16);
114 ape_ctx->totalframes = get_uint32(buf + 24);
115 ape_ctx->finalframeblocks = get_uint32(buf + 28);
116
117 if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL)
118 {
119 ape_ctx->headerlength += 4;
120 }
121
122 if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS)
123 {
124 ape_ctx->seektablelength = get_uint32(buf + ape_ctx->headerlength);
125 ape_ctx->seektablelength *= sizeof(int32_t);
126 ape_ctx->headerlength += 4;
127 } else {
128 ape_ctx->seektablelength = ape_ctx->totalframes * sizeof(int32_t);
129 }
130
131 if (ape_ctx->formatflags & MAC_FORMAT_FLAG_8_BIT)
132 ape_ctx->bps = 8;
133 else if (ape_ctx->formatflags & MAC_FORMAT_FLAG_24_BIT)
134 ape_ctx->bps = 24;
135 else
136 ape_ctx->bps = 16;
137
138 if (ape_ctx->fileversion >= 3950)
139 ape_ctx->blocksperframe = 73728 * 4;
140 else if ((ape_ctx->fileversion >= 3900) || (ape_ctx->fileversion >= 3800 && ape_ctx->compressiontype >= 4000))
141 ape_ctx->blocksperframe = 73728;
142 else
143 ape_ctx->blocksperframe = 9216;
144
145 ape_ctx->seektablefilepos = ape_ctx->junklength + ape_ctx->headerlength +
146 ape_ctx->wavheaderlength;
147
148 ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->headerlength +
149 ape_ctx->wavheaderlength + ape_ctx->seektablelength;
150 }
151
152 ape_ctx->totalsamples = ape_ctx->finalframeblocks;
153 if (ape_ctx->totalframes > 1)
154 ape_ctx->totalsamples += ape_ctx->blocksperframe * (ape_ctx->totalframes-1);
155
156 ape_ctx->numseekpoints = APE_MAX(ape_ctx->maxseekpoints,
157 ape_ctx->seektablelength / sizeof(int32_t));
158
159 return 0;
160}
161
162
163#ifndef ROCKBOX
164/* Helper functions */
165
166static int read_uint16(int fd, uint16_t* x)
167{
168 unsigned char tmp[2];
169 int n;
170
171 n = read(fd,tmp,2);
172
173 if (n != 2)
174 return -1;
175
176 *x = tmp[0] | (tmp[1] << 8);
177
178 return 0;
179}
180
181static int read_int16(int fd, int16_t* x)
182{
183 return read_uint16(fd, (uint16_t*)x);
184}
185
186static int read_uint32(int fd, uint32_t* x)
187{
188 unsigned char tmp[4];
189 int n;
190
191 n = read(fd,tmp,4);
192
193 if (n != 4)
194 return -1;
195
196 *x = tmp[0] | (tmp[1] << 8) | (tmp[2] << 16) | (tmp[3] << 24);
197
198 return 0;
199}
200
201int ape_parseheader(int fd, struct ape_ctx_t* ape_ctx)
202{
203 int i,n;
204
205 /* TODO: Skip any leading junk such as id3v2 tags */
206 ape_ctx->junklength = 0;
207
208 lseek(fd,ape_ctx->junklength,SEEK_SET);
209
210 n = read(fd,&ape_ctx->magic,4);
211 if (n != 4) return -1;
212
213 if (memcmp(ape_ctx->magic,"MAC ",4)!=0)
214 {
215 return -1;
216 }
217
218 if (read_int16(fd,&ape_ctx->fileversion) < 0)
219 return -1;
220
221 if (ape_ctx->fileversion >= 3980)
222 {
223 if (read_int16(fd,&ape_ctx->padding1) < 0)
224 return -1;
225 if (read_uint32(fd,&ape_ctx->descriptorlength) < 0)
226 return -1;
227 if (read_uint32(fd,&ape_ctx->headerlength) < 0)
228 return -1;
229 if (read_uint32(fd,&ape_ctx->seektablelength) < 0)
230 return -1;
231 if (read_uint32(fd,&ape_ctx->wavheaderlength) < 0)
232 return -1;
233 if (read_uint32(fd,&ape_ctx->audiodatalength) < 0)
234 return -1;
235 if (read_uint32(fd,&ape_ctx->audiodatalength_high) < 0)
236 return -1;
237 if (read_uint32(fd,&ape_ctx->wavtaillength) < 0)
238 return -1;
239 if (read(fd,&ape_ctx->md5,16) != 16)
240 return -1;
241
242 /* Skip any unknown bytes at the end of the descriptor. This is for future
243 compatibility */
244 if (ape_ctx->descriptorlength > 52)
245 lseek(fd,ape_ctx->descriptorlength - 52, SEEK_CUR);
246
247 /* Read header data */
248 if (read_uint16(fd,&ape_ctx->compressiontype) < 0)
249 return -1;
250 if (read_uint16(fd,&ape_ctx->formatflags) < 0)
251 return -1;
252 if (read_uint32(fd,&ape_ctx->blocksperframe) < 0)
253 return -1;
254 if (read_uint32(fd,&ape_ctx->finalframeblocks) < 0)
255 return -1;
256 if (read_uint32(fd,&ape_ctx->totalframes) < 0)
257 return -1;
258 if (read_uint16(fd,&ape_ctx->bps) < 0)
259 return -1;
260 if (read_uint16(fd,&ape_ctx->channels) < 0)
261 return -1;
262 if (read_uint32(fd,&ape_ctx->samplerate) < 0)
263 return -1;
264 } else {
265 ape_ctx->descriptorlength = 0;
266 ape_ctx->headerlength = 32;
267
268 if (read_uint16(fd,&ape_ctx->compressiontype) < 0)
269 return -1;
270 if (read_uint16(fd,&ape_ctx->formatflags) < 0)
271 return -1;
272 if (read_uint16(fd,&ape_ctx->channels) < 0)
273 return -1;
274 if (read_uint32(fd,&ape_ctx->samplerate) < 0)
275 return -1;
276 if (read_uint32(fd,&ape_ctx->wavheaderlength) < 0)
277 return -1;
278 if (read_uint32(fd,&ape_ctx->wavtaillength) < 0)
279 return -1;
280 if (read_uint32(fd,&ape_ctx->totalframes) < 0)
281 return -1;
282 if (read_uint32(fd,&ape_ctx->finalframeblocks) < 0)
283 return -1;
284
285 if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL)
286 {
287 lseek(fd, 4, SEEK_CUR); /* Skip the peak level */
288 ape_ctx->headerlength += 4;
289 }
290
291 if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS)
292 {
293 if (read_uint32(fd,&ape_ctx->seektablelength) < 0)
294 return -1;
295 ape_ctx->headerlength += 4;
296 ape_ctx->seektablelength *= sizeof(int32_t);
297 } else {
298 ape_ctx->seektablelength = ape_ctx->totalframes * sizeof(int32_t);
299 }
300
301 if (ape_ctx->formatflags & MAC_FORMAT_FLAG_8_BIT)
302 ape_ctx->bps = 8;
303 else if (ape_ctx->formatflags & MAC_FORMAT_FLAG_24_BIT)
304 ape_ctx->bps = 24;
305 else
306 ape_ctx->bps = 16;
307
308 if (ape_ctx->fileversion >= 3950)
309 ape_ctx->blocksperframe = 73728 * 4;
310 else if ((ape_ctx->fileversion >= 3900) || (ape_ctx->fileversion >= 3800 && ape_ctx->compressiontype >= 4000))
311 ape_ctx->blocksperframe = 73728;
312 else
313 ape_ctx->blocksperframe = 9216;
314
315 /* Skip any stored wav header */
316 if (!(ape_ctx->formatflags & MAC_FORMAT_FLAG_CREATE_WAV_HEADER))
317 {
318 lseek(fd, ape_ctx->wavheaderlength, SEEK_CUR);
319 }
320 }
321
322 ape_ctx->totalsamples = ape_ctx->finalframeblocks;
323 if (ape_ctx->totalframes > 1)
324 ape_ctx->totalsamples += ape_ctx->blocksperframe * (ape_ctx->totalframes-1);
325
326 if (ape_ctx->seektablelength > 0)
327 {
328 ape_ctx->seektable = malloc(ape_ctx->seektablelength);
329 if (ape_ctx->seektable == NULL)
330 return -1;
331 for (i=0; i < ape_ctx->seektablelength / sizeof(uint32_t); i++)
332 {
333 if (read_uint32(fd,&ape_ctx->seektable[i]) < 0)
334 {
335 free(ape_ctx->seektable);
336 return -1;
337 }
338 }
339 }
340
341 ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->descriptorlength +
342 ape_ctx->headerlength + ape_ctx->seektablelength +
343 ape_ctx->wavheaderlength;
344
345 return 0;
346}
347
348void ape_dumpinfo(struct ape_ctx_t* ape_ctx)
349{
350 int i;
351
352 printf("Descriptor Block:\n\n");
353 printf("magic = \"%c%c%c%c\"\n",
354 ape_ctx->magic[0],ape_ctx->magic[1],
355 ape_ctx->magic[2],ape_ctx->magic[3]);
356 printf("fileversion = %d\n",ape_ctx->fileversion);
357 printf("descriptorlength = %d\n",ape_ctx->descriptorlength);
358 printf("headerlength = %d\n",ape_ctx->headerlength);
359 printf("seektablelength = %d\n",ape_ctx->seektablelength);
360 printf("wavheaderlength = %d\n",ape_ctx->wavheaderlength);
361 printf("audiodatalength = %d\n",ape_ctx->audiodatalength);
362 printf("audiodatalength_high = %d\n",ape_ctx->audiodatalength_high);
363 printf("wavtaillength = %d\n",ape_ctx->wavtaillength);
364 printf("md5 = ");
365 for (i = 0; i < 16; i++)
366 printf("%02x",ape_ctx->md5[i]);
367 printf("\n");
368
369 printf("\nHeader Block:\n\n");
370
371 printf("compressiontype = %d\n",ape_ctx->compressiontype);
372 printf("formatflags = %d\n",ape_ctx->formatflags);
373 printf("blocksperframe = %d\n",ape_ctx->blocksperframe);
374 printf("finalframeblocks = %d\n",ape_ctx->finalframeblocks);
375 printf("totalframes = %d\n",ape_ctx->totalframes);
376 printf("bps = %d\n",ape_ctx->bps);
377 printf("channels = %d\n",ape_ctx->channels);
378 printf("samplerate = %d\n",ape_ctx->samplerate);
379
380 printf("\nSeektable\n\n");
381 if ((ape_ctx->seektablelength / sizeof(uint32_t)) != ape_ctx->totalframes)
382 {
383 printf("No seektable\n");
384 }
385 else
386 {
387 for ( i = 0; i < ape_ctx->seektablelength / sizeof(uint32_t) ; i++)
388 {
389 if (i < ape_ctx->totalframes-1) {
390 printf("%8d %d (%d bytes)\n",i,ape_ctx->seektable[i],ape_ctx->seektable[i+1]-ape_ctx->seektable[i]);
391 } else {
392 printf("%8d %d\n",i,ape_ctx->seektable[i]);
393 }
394 }
395 }
396 printf("\nCalculated information:\n\n");
397 printf("junklength = %d\n",ape_ctx->junklength);
398 printf("firstframe = %d\n",ape_ctx->firstframe);
399 printf("totalsamples = %d\n",ape_ctx->totalsamples);
400}
401
402#endif /* !ROCKBOX */
diff --git a/lib/rbcodec/codecs/demac/libdemac/parser.h b/lib/rbcodec/codecs/demac/libdemac/parser.h
new file mode 100644
index 0000000000..6f07deac12
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/parser.h
@@ -0,0 +1,137 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#ifndef _APE_PARSER_H
26#define _APE_PARSER_H
27
28#include <inttypes.h>
29#include "demac_config.h"
30
31/* The earliest and latest file formats supported by this library */
32#define APE_MIN_VERSION 3970
33#define APE_MAX_VERSION 3990
34
35#define MAC_FORMAT_FLAG_8_BIT 1 // is 8-bit [OBSOLETE]
36#define MAC_FORMAT_FLAG_CRC 2 // uses the new CRC32 error detection [OBSOLETE]
37#define MAC_FORMAT_FLAG_HAS_PEAK_LEVEL 4 // uint32 nPeakLevel after the header [OBSOLETE]
38#define MAC_FORMAT_FLAG_24_BIT 8 // is 24-bit [OBSOLETE]
39#define MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS 16 // has the number of seek elements after the peak level
40#define MAC_FORMAT_FLAG_CREATE_WAV_HEADER 32 // create the wave header on decompression (not stored)
41
42
43/* Special frame codes:
44
45 MONO_SILENCE - All PCM samples in frame are zero (mono streams only)
46 LEFT_SILENCE - All PCM samples for left channel in frame are zero (stereo streams)
47 RIGHT_SILENCE - All PCM samples for left channel in frame are zero (stereo streams)
48 PSEUDO_STEREO - Left and Right channels are identical
49
50*/
51
52#define APE_FRAMECODE_MONO_SILENCE 1
53#define APE_FRAMECODE_LEFT_SILENCE 1 /* same as mono */
54#define APE_FRAMECODE_RIGHT_SILENCE 2
55#define APE_FRAMECODE_STEREO_SILENCE 3 /* combined */
56#define APE_FRAMECODE_PSEUDO_STEREO 4
57
58#define PREDICTOR_ORDER 8
59/* Total size of all predictor histories - 50 * sizeof(int32_t) */
60#define PREDICTOR_SIZE 50
61
62
63/* NOTE: This struct is used in predictor-arm.S - any updates need to
64 be reflected there. */
65
66struct predictor_t
67{
68 /* Filter histories */
69 int32_t* buf;
70
71 int32_t YlastA;
72 int32_t XlastA;
73
74 /* NOTE: The order of the next four fields is important for
75 predictor-arm.S */
76 int32_t YfilterB;
77 int32_t XfilterA;
78 int32_t XfilterB;
79 int32_t YfilterA;
80
81 /* Adaption co-efficients */
82 int32_t YcoeffsA[4];
83 int32_t XcoeffsA[4];
84 int32_t YcoeffsB[5];
85 int32_t XcoeffsB[5];
86 int32_t historybuffer[PREDICTOR_HISTORY_SIZE + PREDICTOR_SIZE];
87};
88
89struct ape_ctx_t
90{
91 /* Derived fields */
92 uint32_t junklength;
93 uint32_t firstframe;
94 uint32_t totalsamples;
95
96 /* Info from Descriptor Block */
97 char magic[4];
98 int16_t fileversion;
99 int16_t padding1;
100 uint32_t descriptorlength;
101 uint32_t headerlength;
102 uint32_t seektablelength;
103 uint32_t wavheaderlength;
104 uint32_t audiodatalength;
105 uint32_t audiodatalength_high;
106 uint32_t wavtaillength;
107 uint8_t md5[16];
108
109 /* Info from Header Block */
110 uint16_t compressiontype;
111 uint16_t formatflags;
112 uint32_t blocksperframe;
113 uint32_t finalframeblocks;
114 uint32_t totalframes;
115 uint16_t bps;
116 uint16_t channels;
117 uint32_t samplerate;
118
119 /* Seektable */
120 uint32_t* seektable; /* Seektable buffer */
121 uint32_t maxseekpoints; /* Max seekpoints we can store (size of seektable buffer) */
122 uint32_t numseekpoints; /* Number of seekpoints */
123 int seektablefilepos; /* Location in .ape file of seektable */
124
125 /* Decoder state */
126 uint32_t CRC;
127 int frameflags;
128 int currentframeblocks;
129 int blocksdecoded;
130 struct predictor_t predictor;
131};
132
133int ape_parseheader(int fd, struct ape_ctx_t* ape_ctx);
134int ape_parseheaderbuf(unsigned char* buf, struct ape_ctx_t* ape_ctx);
135void ape_dumpinfo(struct ape_ctx_t* ape_ctx);
136
137#endif
diff --git a/lib/rbcodec/codecs/demac/libdemac/predictor-arm.S b/lib/rbcodec/codecs/demac/libdemac/predictor-arm.S
new file mode 100644
index 0000000000..92a78ed9b4
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/predictor-arm.S
@@ -0,0 +1,702 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24#include "demac_config.h"
25
26 .section ICODE_SECTION_DEMAC_ARM,"ax",%progbits
27
28 .align 2
29
30/* NOTE: The following need to be kept in sync with parser.h */
31
32#define YDELAYA 200
33#define YDELAYB 168
34#define XDELAYA 136
35#define XDELAYB 104
36#define YADAPTCOEFFSA 72
37#define XADAPTCOEFFSA 56
38#define YADAPTCOEFFSB 40
39#define XADAPTCOEFFSB 20
40
41/* struct predictor_t members: */
42#define buf 0 /* int32_t* buf */
43
44#define YlastA 4 /* int32_t YlastA; */
45#define XlastA 8 /* int32_t XlastA; */
46
47#define YfilterB 12 /* int32_t YfilterB; */
48#define XfilterA 16 /* int32_t XfilterA; */
49
50#define XfilterB 20 /* int32_t XfilterB; */
51#define YfilterA 24 /* int32_t YfilterA; */
52
53#define YcoeffsA 28 /* int32_t YcoeffsA[4]; */
54#define XcoeffsA 44 /* int32_t XcoeffsA[4]; */
55#define YcoeffsB 60 /* int32_t YcoeffsB[5]; */
56#define XcoeffsB 80 /* int32_t XcoeffsB[5]; */
57
58#define historybuffer 100 /* int32_t historybuffer[] */
59
60@ Macro for loading 2 registers, for various ARM versions.
61@ Registers must start with an even register, and must be consecutive.
62
63.macro LDR2OFS reg1, reg2, base, offset
64#if ARM_ARCH >= 6
65 ldrd \reg1, [\base, \offset]
66#else /* ARM_ARCH < 6 */
67#ifdef CPU_ARM7TDMI
68 add \reg1, \base, \offset
69 ldmia \reg1, {\reg1, \reg2}
70#else /* ARM9 (v4 and v5) is faster this way */
71 ldr \reg1, [\base, \offset]
72 ldr \reg2, [\base, \offset+4]
73#endif
74#endif /* ARM_ARCH */
75.endm
76
77@ Macro for storing 2 registers, for various ARM versions.
78@ Registers must start with an even register, and must be consecutive.
79
80.macro STR2OFS reg1, reg2, base, offset
81#if ARM_ARCH >= 6
82 strd \reg1, [\base, \offset]
83#else
84 str \reg1, [\base, \offset]
85 str \reg2, [\base, \offset+4]
86#endif
87.endm
88
89 .global predictor_decode_stereo
90 .type predictor_decode_stereo,%function
91
92@ Register usage:
93@
94@ r0-r11 - scratch
95@ r12 - struct predictor_t* p
96@ r14 - int32_t* p->buf
97
98@ void predictor_decode_stereo(struct predictor_t* p,
99@ int32_t* decoded0,
100@ int32_t* decoded1,
101@ int count)
102
103predictor_decode_stereo:
104 stmdb sp!, {r1-r11, lr}
105
106 @ r1 (decoded0) is [sp]
107 @ r2 (decoded1) is [sp, #4]
108 @ r3 (count) is [sp, #8]
109
110 mov r12, r0 @ r12 := p
111 ldr r14, [r0] @ r14 := p->buf
112
113loop:
114
115@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR Y
116
117@ Predictor Y, Filter A
118
119 ldr r11, [r12, #YlastA] @ r11 := p->YlastA
120
121 add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3]
122 ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3]
123 @ r3 := p->buf[YDELAYA-2]
124 @ r10 := p->buf[YDELAYA-1]
125
126 add r6, r12, #YcoeffsA
127 ldmia r6, {r6 - r9} @ r6 := p->YcoeffsA[0]
128 @ r7 := p->YcoeffsA[1]
129 @ r8 := p->YcoeffsA[2]
130 @ r9 := p->YcoeffsA[3]
131
132 subs r10, r11, r10 @ r10 := r11 - r10
133
134 STR2OFS r10, r11, r14, #YDELAYA-4
135 @ p->buf[YDELAYA-1] = r10
136 @ p->buf[YDELAYA] = r11
137
138 mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0]
139 mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
140 mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
141 mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
142
143 @ flags were set above, in the subs instruction
144 mvngt r10, #0
145 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
146
147 cmp r11, #0
148 mvngt r11, #0
149 movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
150
151 STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4
152 @ p->buf[YADAPTCOEFFSA-1] := r10
153 @ p->buf[YADAPTCOEFFSA] := r11
154
155 @ NOTE: r0 now contains predictionA - don't overwrite.
156
157@ Predictor Y, Filter B
158
159 LDR2OFS r6, r7, r12, #YfilterB @ r6 := p->YfilterB
160 @ r7 := p->XfilterA
161
162 add r2, r14, #YDELAYB-16 @ r2 := &p->buf[YDELAYB-4]
163 ldmia r2, {r2 - r4, r10} @ r2 := p->buf[YDELAYB-4]
164 @ r3 := p->buf[YDELAYB-3]
165 @ r4 := p->buf[YDELAYB-2]
166 @ r10 := p->buf[YDELAYB-1]
167
168 rsb r6, r6, r6, lsl #5 @ r6 := r6 * 32 - r6 ( == r6*31)
169 sub r11, r7, r6, asr #5 @ r11 (p->buf[YDELAYB]) := r7 - (r6 >> 5)
170
171 str r7, [r12, #YfilterB] @ p->YfilterB := r7 (p->XfilterA)
172
173 add r5, r12, #YcoeffsB
174 ldmia r5, {r5 - r9} @ r5 := p->YcoeffsB[0]
175 @ r6 := p->YcoeffsB[1]
176 @ r7 := p->YcoeffsB[2]
177 @ r8 := p->YcoeffsB[3]
178 @ r9 := p->YcoeffsB[4]
179
180 subs r10, r11, r10 @ r10 := r11 - r10
181
182 STR2OFS r10, r11, r14, #YDELAYB-4
183 @ p->buf[YDELAYB-1] = r10
184 @ p->buf[YDELAYB] = r11
185
186 mul r1, r11, r5 @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0]
187 mla r1, r10, r6, r1 @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
188 mla r1, r4, r7, r1 @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
189 mla r1, r3, r8, r1 @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
190 mla r1, r2, r9, r1 @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
191
192 @ flags were set above, in the subs instruction
193 mvngt r10, #0
194 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
195
196 cmp r11, #0
197 mvngt r11, #0
198 movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
199
200 STR2OFS r10, r11, r14, #YADAPTCOEFFSB-4
201 @ p->buf[YADAPTCOEFFSB-1] := r10
202 @ p->buf[YADAPTCOEFFSB] := r11
203
204 @ r0 still contains predictionA
205 @ r1 contains predictionB
206
207 @ Finish Predictor Y
208
209 ldr r2, [sp] @ r2 := decoded0
210 add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1)
211 ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA
212 ldr r3, [r2] @ r3 := *decoded0
213 rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
214 add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
215 str r1, [r12, #YlastA] @ p->YlastA := r1
216 add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
217 str r1, [r12, #YfilterA] @ p->YfilterA := r1
218
219 @ r1 contains p->YfilterA
220 @ r2 contains decoded0
221 @ r3 contains *decoded0
222
223 @ r5, r6, r7, r8, r9 contain p->YcoeffsB[0..4]
224 @ r10, r11 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
225
226 str r1, [r2], #4 @ *(decoded0++) := r1 (p->YfilterA)
227 str r2, [sp] @ save decoded0
228 cmp r3, #0
229 beq 3f
230
231 add r2, r14, #YADAPTCOEFFSB-16
232 ldmia r2, {r2 - r4} @ r2 := p->buf[YADAPTCOEFFSB-4]
233 @ r3 := p->buf[YADAPTCOEFFSB-3]
234 @ r4 := p->buf[YADAPTCOEFFSB-2]
235 blt 1f
236
237 @ *decoded0 > 0
238
239 sub r5, r5, r11 @ r5 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
240 sub r6, r6, r10 @ r6 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
241 sub r9, r9, r2 @ r9 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
242 sub r8, r8, r3 @ r8 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
243 sub r7, r7, r4 @ r7 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
244
245 add r0, r12, #YcoeffsB
246 stmia r0, {r5 - r9} @ Save p->YcoeffsB[]
247
248 add r1, r12, #YcoeffsA
249 ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0]
250 @ r3 := p->YcoeffsA[1]
251 @ r4 := p->YcoeffsA[2]
252 @ r5 := p->YcoeffsA[3]
253
254 add r6, r14, #YADAPTCOEFFSA-12
255 ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3]
256 @ r7 := p->buf[YADAPTCOEFFSA-2]
257 @ r8 := p->buf[YADAPTCOEFFSA-1]
258 @ r9 := p->buf[YADAPTCOEFFSA]
259
260 sub r5, r5, r6 @ r5 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
261 sub r4, r4, r7 @ r4 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
262 sub r3, r3, r8 @ r3 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
263 sub r2, r2, r9 @ r2 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
264
265 b 2f
266
267
2681: @ *decoded0 < 0
269
270 add r5, r5, r11 @ r5 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
271 add r6, r6, r10 @ r6 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
272 add r9, r9, r2 @ r9 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
273 add r8, r8, r3 @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
274 add r7, r7, r4 @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
275
276 add r0, r12, #YcoeffsB
277 stmia r0, {r5 - r9} @ Save p->YcoeffsB[]
278
279 add r1, r12, #YcoeffsA
280 ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0]
281 @ r3 := p->YcoeffsA[1]
282 @ r4 := p->YcoeffsA[2]
283 @ r5 := p->YcoeffsA[3]
284
285 add r6, r14, #YADAPTCOEFFSA-12
286 ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3]
287 @ r7 := p->buf[YADAPTCOEFFSA-2]
288 @ r8 := p->buf[YADAPTCOEFFSA-1]
289 @ r9 := p->buf[YADAPTCOEFFSA]
290
291 add r5, r5, r6 @ r5 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
292 add r4, r4, r7 @ r4 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
293 add r3, r3, r8 @ r3 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
294 add r2, r2, r9 @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
295
2962:
297 stmia r1, {r2 - r5} @ Save p->YcoeffsA
298
2993:
300
301@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR X
302
303@ Predictor X, Filter A
304
305 ldr r11, [r12, #XlastA] @ r11 := p->XlastA
306
307 add r2, r14, #XDELAYA-12 @ r2 := &p->buf[XDELAYA-3]
308 ldmia r2, {r2, r3, r10} @ r2 := p->buf[XDELAYA-3]
309 @ r3 := p->buf[XDELAYA-2]
310 @ r10 := p->buf[XDELAYA-1]
311
312 add r6, r12, #XcoeffsA
313 ldmia r6, {r6 - r9} @ r6 := p->XcoeffsA[0]
314 @ r7 := p->XcoeffsA[1]
315 @ r8 := p->XcoeffsA[2]
316 @ r9 := p->XcoeffsA[3]
317
318 subs r10, r11, r10 @ r10 := r11 - r10
319
320 STR2OFS r10, r11, r14, #XDELAYA-4
321 @ p->buf[XDELAYA-1] = r10
322 @ p->buf[XDELAYA] = r11
323
324 mul r0, r11, r6 @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0]
325 mla r0, r10, r7, r0 @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
326 mla r0, r3, r8, r0 @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
327 mla r0, r2, r9, r0 @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
328
329 @ flags were set above, in the subs instruction
330 mvngt r10, #0
331 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
332
333 cmp r11, #0
334 mvngt r11, #0
335 movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
336
337 STR2OFS r10, r11, r14, #XADAPTCOEFFSA-4
338 @ p->buf[XADAPTCOEFFSA-1] := r10
339 @ p->buf[XADAPTCOEFFSA] := r11
340
341 @ NOTE: r0 now contains predictionA - don't overwrite.
342
343@ Predictor X, Filter B
344
345 LDR2OFS r6, r7, r12, #XfilterB @ r6 := p->XfilterB
346 @ r7 := p->YfilterA
347
348 add r2, r14, #XDELAYB-16 @ r2 := &p->buf[XDELAYB-4]
349 ldmia r2, {r2 - r4, r10} @ r2 := p->buf[XDELAYB-4]
350 @ r3 := p->buf[XDELAYB-3]
351 @ r4 := p->buf[XDELAYB-2]
352 @ r10 := p->buf[XDELAYB-1]
353
354 rsb r6, r6, r6, lsl #5 @ r6 := r2 * 32 - r6 ( == r6*31)
355 sub r11, r7, r6, asr #5 @ r11 (p->buf[XDELAYB]) := r7 - (r6 >> 5)
356
357 str r7, [r12, #XfilterB] @ p->XfilterB := r7 (p->YfilterA)
358
359 add r5, r12, #XcoeffsB
360 ldmia r5, {r5 - r9} @ r5 := p->XcoeffsB[0]
361 @ r6 := p->XcoeffsB[1]
362 @ r7 := p->XcoeffsB[2]
363 @ r8 := p->XcoeffsB[3]
364 @ r9 := p->XcoeffsB[4]
365
366 subs r10, r11, r10 @ r10 := r11 - r10
367
368 STR2OFS r10, r11, r14, #XDELAYB-4
369 @ p->buf[XDELAYB-1] = r10
370 @ p->buf[XDELAYB] = r11
371
372 mul r1, r11, r5 @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0]
373 mla r1, r10, r6, r1 @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
374 mla r1, r4, r7, r1 @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
375 mla r1, r3, r8, r1 @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
376 mla r1, r2, r9, r1 @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
377
378 @ flags were set above, in the subs instruction
379 mvngt r10, #0
380 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
381
382 cmp r11, #0
383 mvngt r11, #0
384 movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
385
386 STR2OFS r10, r11, r14, #XADAPTCOEFFSB-4
387 @ p->buf[XADAPTCOEFFSB-1] := r10
388 @ p->buf[XADAPTCOEFFSB] := r11
389
390 @ r0 still contains predictionA
391 @ r1 contains predictionB
392
393 @ Finish Predictor X
394
395 ldr r2, [sp, #4] @ r2 := decoded1
396 add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1)
397 ldr r4, [r12, #XfilterA] @ r4 := p->XfilterA
398 ldr r3, [r2] @ r3 := *decoded1
399 rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
400 add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
401 str r1, [r12, #XlastA] @ p->XlastA := r1
402 add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
403 str r1, [r12, #XfilterA] @ p->XfilterA := r1
404
405 @ r1 contains p->XfilterA
406 @ r2 contains decoded1
407 @ r3 contains *decoded1
408
409 @ r5, r6, r7, r8, r9 contain p->XcoeffsB[0..4]
410 @ r10, r11 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
411
412 str r1, [r2], #4 @ *(decoded1++) := r1 (p->XfilterA)
413 str r2, [sp, #4] @ save decoded1
414 cmp r3, #0
415 beq 3f
416
417 add r2, r14, #XADAPTCOEFFSB-16
418 ldmia r2, {r2 - r4} @ r2 := p->buf[XADAPTCOEFFSB-4]
419 @ r3 := p->buf[XADAPTCOEFFSB-3]
420 @ r4 := p->buf[XADAPTCOEFFSB-2]
421 blt 1f
422
423 @ *decoded1 > 0
424
425 sub r5, r5, r11 @ r5 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
426 sub r6, r6, r10 @ r6 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
427 sub r9, r9, r2 @ r9 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
428 sub r8, r8, r3 @ r8 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
429 sub r7, r7, r4 @ r7 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
430
431 add r0, r12, #XcoeffsB
432 stmia r0, {r5 - r9} @ Save p->XcoeffsB[]
433
434 add r1, r12, #XcoeffsA
435 ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0]
436 @ r3 := p->XcoeffsA[1]
437 @ r4 := p->XcoeffsA[2]
438 @ r5 := p->XcoeffsA[3]
439
440 add r6, r14, #XADAPTCOEFFSA-12
441 ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3]
442 @ r7 := p->buf[XADAPTCOEFFSA-2]
443 @ r8 := p->buf[XADAPTCOEFFSA-1]
444 @ r9 := p->buf[XADAPTCOEFFSA]
445
446 sub r5, r5, r6 @ r5 := p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
447 sub r4, r4, r7 @ r4 := p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
448 sub r3, r3, r8 @ r3 := p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
449 sub r2, r2, r9 @ r2 := p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
450
451 b 2f
452
453
4541: @ *decoded1 < 0
455
456 add r5, r5, r11 @ r5 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
457 add r6, r6, r10 @ r6 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
458 add r9, r9, r2 @ r9 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
459 add r8, r8, r3 @ r8 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
460 add r7, r7, r4 @ r7 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
461
462 add r0, r12, #XcoeffsB
463 stmia r0, {r5 - r9} @ Save p->XcoeffsB[]
464
465 add r1, r12, #XcoeffsA
466 ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0]
467 @ r3 := p->XcoeffsA[1]
468 @ r4 := p->XcoeffsA[2]
469 @ r5 := p->XcoeffsA[3]
470
471 add r6, r14, #XADAPTCOEFFSA-12
472 ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3]
473 @ r7 := p->buf[XADAPTCOEFFSA-2]
474 @ r8 := p->buf[XADAPTCOEFFSA-1]
475 @ r9 := p->buf[XADAPTCOEFFSA]
476
477 add r5, r5, r6 @ r5 := p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
478 add r4, r4, r7 @ r4 := p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
479 add r3, r3, r8 @ r3 := p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
480 add r2, r2, r9 @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
481
4822:
483 stmia r1, {r2 - r5} @ Save p->XcoeffsA
484
4853:
486
487@@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON
488
489 add r14, r14, #4 @ p->buf++
490
491 add r11, r12, #historybuffer @ r11 := &p->historybuffer[0]
492
493 sub r10, r14, #PREDICTOR_HISTORY_SIZE*4
494 @ r10 := p->buf - PREDICTOR_HISTORY_SIZE
495
496 ldr r0, [sp, #8]
497 cmp r10, r11
498 beq move_hist @ The history buffer is full, we need to do a memmove
499
500 @ Check loop count
501 subs r0, r0, #1
502 strne r0, [sp, #8]
503 bne loop
504
505done:
506 str r14, [r12] @ Save value of p->buf
507 add sp, sp, #12 @ Don't bother restoring r1-r3
508#ifdef ROCKBOX
509 ldmpc regs=r4-r11
510#else
511 ldmia sp!, {r4 - r11, pc}
512#endif
513
514move_hist:
515 @ dest = r11 (p->historybuffer)
516 @ src = r14 (p->buf)
517 @ n = 200
518
519 ldmia r14!, {r0-r9} @ 40 bytes
520 stmia r11!, {r0-r9}
521 ldmia r14!, {r0-r9} @ 40 bytes
522 stmia r11!, {r0-r9}
523 ldmia r14!, {r0-r9} @ 40 bytes
524 stmia r11!, {r0-r9}
525 ldmia r14!, {r0-r9} @ 40 bytes
526 stmia r11!, {r0-r9}
527 ldmia r14!, {r0-r9} @ 40 bytes
528 stmia r11!, {r0-r9}
529
530 ldr r0, [sp, #8]
531 add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0]
532
533 @ Check loop count
534 subs r0, r0, #1
535 strne r0, [sp, #8]
536 bne loop
537
538 b done
539 .size predictor_decode_stereo, .-predictor_decode_stereo
540
541 .global predictor_decode_mono
542 .type predictor_decode_mono,%function
543
544@ Register usage:
545@
546@ r0-r11 - scratch
547@ r12 - struct predictor_t* p
548@ r14 - int32_t* p->buf
549
550@ void predictor_decode_mono(struct predictor_t* p,
551@ int32_t* decoded0,
552@ int count)
553
554predictor_decode_mono:
555 stmdb sp!, {r1, r2, r4-r11, lr}
556
557 @ r1 (decoded0) is [sp]
558 @ r2 (count) is [sp, #4]
559
560 mov r12, r0 @ r12 := p
561 ldr r14, [r0] @ r14 := p->buf
562
563loopm:
564
565@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR
566
567 ldr r11, [r12, #YlastA] @ r11 := p->YlastA
568
569 add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3]
570 ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3]
571 @ r3 := p->buf[YDELAYA-2]
572 @ r10 := p->buf[YDELAYA-1]
573
574 add r5, r12, #YcoeffsA @ r5 := &p->YcoeffsA[0]
575 ldmia r5, {r6 - r9} @ r6 := p->YcoeffsA[0]
576 @ r7 := p->YcoeffsA[1]
577 @ r8 := p->YcoeffsA[2]
578 @ r9 := p->YcoeffsA[3]
579
580 subs r10, r11, r10 @ r10 := r11 - r10
581
582 STR2OFS r10, r11, r14, #YDELAYA-4
583 @ p->buf[YDELAYA-1] = r10
584 @ p->buf[YDELAYA] = r11
585
586 mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0]
587 mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
588 mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
589 mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
590
591 @ flags were set above, in the subs instruction
592 mvngt r10, #0
593 movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
594
595 cmp r11, #0
596 mvngt r11, #0
597 movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
598
599 STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4
600 @ p->buf[YADAPTCOEFFSA-1] := r10
601 @ p->buf[YADAPTCOEFFSA] := r11
602
603 ldr r2, [sp] @ r2 := decoded0
604 ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA
605 ldr r3, [r2] @ r3 := *decoded0
606 rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
607 add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
608 str r1, [r12, #YlastA] @ p->YlastA := r1
609 add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
610 str r1, [r12, #YfilterA] @ p->YfilterA := r1
611
612 @ r1 contains p->YfilterA
613 @ r2 contains decoded0
614 @ r3 contains *decoded0
615
616 @ r6, r7, r8, r9 contain p->YcoeffsA[0..3]
617 @ r10, r11 contain p->buf[YADAPTCOEFFSA-1] and p->buf[YADAPTCOEFFSA]
618
619 str r1, [r2], #4 @ *(decoded0++) := r1 (p->YfilterA)
620 str r2, [sp] @ save decoded0
621 cmp r3, #0
622 beq 3f
623
624 LDR2OFS r2, r3, r14, #YADAPTCOEFFSA-12
625 @ r2 := p->buf[YADAPTCOEFFSA-3]
626 @ r3 := p->buf[YADAPTCOEFFSA-2]
627 blt 1f
628
629 @ *decoded0 > 0
630
631 sub r6, r6, r11 @ r6 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
632 sub r7, r7, r10 @ r7 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
633 sub r9, r9, r2 @ r9 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
634 sub r8, r8, r3 @ r8 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
635
636 b 2f
637
6381: @ *decoded0 < 0
639
640 add r6, r6, r11 @ r6 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
641 add r7, r7, r10 @ r7 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
642 add r9, r9, r2 @ r9 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
643 add r8, r8, r3 @ r8 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
644
6452:
646 stmia r5, {r6 - r9} @ Save p->YcoeffsA
647
6483:
649
650@@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON
651
652 add r14, r14, #4 @ p->buf++
653
654 add r11, r12, #historybuffer @ r11 := &p->historybuffer[0]
655
656 sub r10, r14, #PREDICTOR_HISTORY_SIZE*4
657 @ r10 := p->buf - PREDICTOR_HISTORY_SIZE
658
659 ldr r0, [sp, #4]
660 cmp r10, r11
661 beq move_histm @ The history buffer is full, we need to do a memmove
662
663 @ Check loop count
664 subs r0, r0, #1
665 strne r0, [sp, #4]
666 bne loopm
667
668donem:
669 str r14, [r12] @ Save value of p->buf
670 add sp, sp, #8 @ Don't bother restoring r1, r2
671#ifdef ROCKBOX
672 ldmpc regs=r4-r11
673#else
674 ldmia sp!, {r4 - r11, pc}
675#endif
676
677move_histm:
678 @ dest = r11 (p->historybuffer)
679 @ src = r14 (p->buf)
680 @ n = 200
681
682 ldmia r14!, {r0-r9} @ 40 bytes
683 stmia r11!, {r0-r9}
684 ldmia r14!, {r0-r9} @ 40 bytes
685 stmia r11!, {r0-r9}
686 ldmia r14!, {r0-r9} @ 40 bytes
687 stmia r11!, {r0-r9}
688 ldmia r14!, {r0-r9} @ 40 bytes
689 stmia r11!, {r0-r9}
690 ldmia r14!, {r0-r9} @ 40 bytes
691 stmia r11!, {r0-r9}
692
693 ldr r0, [sp, #4]
694 add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0]
695
696 @ Check loop count
697 subs r0, r0, #1
698 strne r0, [sp, #4]
699 bne loopm
700
701 b donem
702 .size predictor_decode_mono, .-predictor_decode_mono
diff --git a/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S b/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S
new file mode 100644
index 0000000000..fc1d901a59
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S
@@ -0,0 +1,660 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9Coldfire predictor copyright (C) 2007 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27#include "demac_config.h"
28
29/* NOTE: The following need to be kept in sync with parser.h */
30
31#define YDELAYA 200
32#define YDELAYB 168
33#define XDELAYA 136
34#define XDELAYB 104
35#define YADAPTCOEFFSA 72
36#define XADAPTCOEFFSA 56
37#define YADAPTCOEFFSB 40
38#define XADAPTCOEFFSB 20
39
40/* struct predictor_t members: */
41#define buf 0 /* int32_t* buf */
42
43#define YlastA 4 /* int32_t YlastA; */
44#define XlastA 8 /* int32_t XlastA; */
45
46#define YfilterB 12 /* int32_t YfilterB; */
47#define XfilterA 16 /* int32_t XfilterA; */
48
49#define XfilterB 20 /* int32_t XfilterB; */
50#define YfilterA 24 /* int32_t YfilterA; */
51
52#define YcoeffsA 28 /* int32_t YcoeffsA[4]; */
53#define XcoeffsA 44 /* int32_t XcoeffsA[4]; */
54#define YcoeffsB 60 /* int32_t YcoeffsB[5]; */
55#define XcoeffsB 80 /* int32_t XcoeffsB[5]; */
56
57#define historybuffer 100 /* int32_t historybuffer[] */
58
59
60 .text
61
62 .align 2
63
64 .global predictor_decode_stereo
65 .type predictor_decode_stereo,@function
66
67| void predictor_decode_stereo(struct predictor_t* p,
68| int32_t* decoded0,
69| int32_t* decoded1,
70| int count)
71
72predictor_decode_stereo:
73 lea.l (-12*4,%sp), %sp
74 movem.l %d2-%d7/%a2-%a6, (4,%sp)
75
76 movem.l (12*4+8,%sp), %a3-%a5 | %a3 = decoded0
77 | %a4 = decoded1
78 move.l %a5, (%sp) | (%sp) = count
79
80 move.l #0, %macsr | signed integer mode
81 move.l (12*4+4,%sp), %a6 | %a6 = p
82 move.l (%a6), %a5 | %a5 = p->buf
83
84.loop:
85
86 | ***** PREDICTOR Y *****
87
88 | Predictor Y, Filter A
89
90 move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
91
92 movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3]
93 | %d1 = p->buf[YDELAYA-2]
94 | %d2 = p->buf[YDELAYA-1]
95
96 move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3
97
98 sub.l %d3, %d2
99 neg.l %d2 | %d2 = %d3 - %d2
100
101 move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2
102
103 movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
104 | %d5 = p->YcoeffsA[1]
105 | %d6 = p->YcoeffsA[2]
106 | %d7 = p->YcoeffsA[3]
107
108 mac.l %d3, %d4, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0]
109 mac.l %d2, %d5, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
110 mac.l %d1, %d6, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
111 mac.l %d0, %d7, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
112
113 tst.l %d2
114 beq.s 1f
115 spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
116 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
117 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
1181: | %d2 = SIGN(%d2)
119 move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2
120
121 tst.l %d3
122 beq.s 1f
123 spl.b %d3
124 extb.l %d3
125 or.l #1, %d3
1261: | %d3 = SIGN(%d3)
127 move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3
128
129 | Predictor Y, Filter B
130
131 movem.l (YfilterB,%a6), %d2-%d3 | %d2 = p->YfilterB
132 | %d3 = p->XfilterA
133 move.l %d3, (YfilterB,%a6) | p->YfilterB = %d3
134
135 move.l %d2, %d1 | %d1 = %d2
136 lsl.l #5, %d2 | %d2 = %d2 * 32
137 sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2)
138 asr.l #5, %d2 | %d2 >>= 5
139 sub.l %d2, %d3 | %d3 -= %d2
140
141 movem.l (YDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[YDELAYB-4]
142 | %d5 = p->buf[YDELAYB-3]
143 | %d6 = p->buf[YDELAYB-2]
144 | %d7 = p->buf[YDELAYB-1]
145 sub.l %d3, %d7
146 neg.l %d7 | %d7 = %d3 - %d7
147
148 move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7
149
150 movem.l (YcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->YcoeffsB[0]
151 | %d2 = p->YcoeffsB[1]
152 | %a0 = p->YcoeffsB[2]
153 | %a1 = p->YcoeffsB[3]
154 | %a2 = p->YcoeffsB[4]
155
156 mac.l %d3, %d1, %acc1 | %acc1 = p->buf[YDELAYB] * p->YcoeffsB[0]
157 mac.l %d7, %d2, %acc1 | %acc1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
158 mac.l %d6, %a0, %acc1 | %acc1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
159 mac.l %d5, %a1, %acc1 | %acc1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
160 mac.l %d4, %a2, %acc1 | %acc1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
161
162 move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3
163
164 tst.l %d7
165 beq.s 1f
166 spl.b %d7
167 extb.l %d7
168 or.l #1, %d7
1691: | %d7 = SIGN(%d7)
170 move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7
171 tst.l %d3
172 beq.s 1f
173 spl.b %d3
174 extb.l %d3
175 or.l #1, %d3
1761: | %d3 = SIGN(%d3)
177 move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3
178
179 | %d1, %d2, %a0, %a1, %a2 contain p->YcoeffsB[0..4]
180 | %d7, %d3 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
181
182 move.l (%a3), %d0 | %d0 = *decoded0
183 beq.s 3f
184
185 movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[YADAPTCOEFFSB-4]
186 | %d5 = p->buf[YADAPTCOEFFSB-3]
187 | %d6 = p->buf[YADAPTCOEFFSB-2]
188
189 bmi.s 1f | flags still valid here
190
191 | *decoded0 > 0
192
193 sub.l %d3, %d1 | %d1 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
194 sub.l %d7, %d2 | %d2 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
195 sub.l %d6, %a0 | %a0 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
196 sub.l %d5, %a1 | %a1 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
197 sub.l %d4, %a2 | %a2 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
198
199 movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[]
200
201 movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
202 | %d5 = p->YcoeffsA[1]
203 | %d6 = p->YcoeffsA[2]
204 | %d7 = p->YcoeffsA[3]
205
206 movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
207 | %d2 = p->buf[YADAPTCOEFFSA-3]
208 | %a0 = p->buf[YADAPTCOEFFSA-2]
209 | %a1 = p->buf[YADAPTCOEFFSA-1]
210 | %a2 = p->buf[YADAPTCOEFFSA]
211
212 sub.l %a2, %d4 | %d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
213 sub.l %a1, %d5 | %d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
214 sub.l %a0, %d6 | %d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
215 sub.l %d2, %d7 | %d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
216
217 bra.s 2f
218
2191: | *decoded0 < 0
220
221 add.l %d3, %d1 | %d1 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
222 add.l %d7, %d2 | %d2 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
223 add.l %d6, %a0 | %a0 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
224 add.l %d5, %a1 | %a1 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
225 add.l %d4, %a2 | %a2 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
226
227 movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[]
228
229 movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
230 | %d5 = p->YcoeffsA[1]
231 | %d6 = p->YcoeffsA[2]
232 | %d7 = p->YcoeffsA[3]
233
234 movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
235 | %d2 = p->buf[YADAPTCOEFFSA-3]
236 | %a0 = p->buf[YADAPTCOEFFSA-2]
237 | %a1 = p->buf[YADAPTCOEFFSA-1]
238 | %a2 = p->buf[YADAPTCOEFFSA]
239
240 add.l %a2, %d4 | %d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
241 add.l %a1, %d5 | %d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
242 add.l %a0, %d6 | %d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
243 add.l %d2, %d7 | %d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
244
2452:
246 movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[]
247
2483:
249 | Finish Predictor Y
250
251 movclr.l %acc0, %d1 | %d1 = predictionA
252 movclr.l %acc1, %d2 | %d2 = predictionB
253 asr.l #1, %d2
254 add.l %d2, %d1 | %d1 += (%d2 >> 1)
255 asr.l #8, %d1
256 asr.l #2, %d1 | %d1 >>= 10
257 add.l %d0, %d1 | %d1 += %d0
258 move.l %d1, (YlastA,%a6) | p->YlastA = %d1
259
260 move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA
261 move.l %d2, %d0
262 lsl.l #5, %d2
263 sub.l %d0, %d2 | %d2 = 31 * %d2
264 asr.l #5, %d2 | %d2 >>= 5
265 add.l %d1, %d2
266 move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2
267
268 | *decoded0 stored 2 instructions down, avoiding pipeline stall
269
270 | ***** PREDICTOR X *****
271
272 | Predictor X, Filter A
273
274 move.l (XlastA,%a6), %d3 | %d3 = p->XlastA
275
276 move.l %d2, (%a3)+ | *(decoded0++) = %d2 (p->YfilterA)
277
278 movem.l (XDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[XDELAYA-3]
279 | %d1 = p->buf[XDELAYA-2]
280 | %d2 = p->buf[XDELAYA-1]
281
282 move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3
283
284 sub.l %d3, %d2
285 neg.l %d2 | %d2 = %d3 -%d2
286
287 move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2
288
289 movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
290 | %d5 = p->XcoeffsA[1]
291 | %d6 = p->XcoeffsA[2]
292 | %d7 = p->XcoeffsA[3]
293
294 mac.l %d3, %d4, %acc0 | %acc0 = p->buf[XDELAYA] * p->XcoeffsA[0]
295 mac.l %d2, %d5, %acc0 | %acc0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
296 mac.l %d1, %d6, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
297 mac.l %d0, %d7, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
298
299 tst.l %d2
300 beq.s 1f
301 spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
302 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
303 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
3041: | %d2 = SIGN(%d2)
305 move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = %d2
306
307 tst.l %d3
308 beq.s 1f
309 spl.b %d3
310 extb.l %d3
311 or.l #1, %d3
3121: | %d3 = SIGN(%d3)
313 move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = %d3
314
315 | Predictor X, Filter B
316
317 movem.l (XfilterB,%a6), %d2-%d3 | %d2 = p->XfilterB
318 | %d3 = p->YfilterA
319 move.l %d3, (XfilterB,%a6) | p->XfilterB = %d3
320
321 move.l %d2, %d1 | %d1 = %d2
322 lsl.l #5, %d2 | %d2 = %d2 * 32
323 sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2)
324 asr.l #5, %d2 | %d2 >>= 5
325 sub.l %d2, %d3 | %d3 -= %d2
326
327 movem.l (XDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[XDELAYB-4]
328 | %d5 = p->buf[XDELAYB-3]
329 | %d6 = p->buf[XDELAYB-2]
330 | %d7 = p->buf[XDELAYB-1]
331 sub.l %d3, %d7
332 neg.l %d7 | %d7 = %d3 - %d7
333
334 move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7
335
336 movem.l (XcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->XcoeffsB[0]
337 | %d2 = p->XcoeffsB[1]
338 | %a0 = p->XcoeffsB[2]
339 | %a1 = p->XcoeffsB[3]
340 | %a2 = p->XcoeffsB[4]
341
342 mac.l %d3, %d1, %acc1 | %acc1 = p->buf[XDELAYB] * p->XcoeffsB[0]
343 mac.l %d7, %d2, %acc1 | %acc1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
344 mac.l %d6, %a0, %acc1 | %acc1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
345 mac.l %d5, %a1, %acc1 | %acc1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
346 mac.l %d4, %a2, %acc1 | %acc1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
347
348 move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3
349
350 tst.l %d7
351 beq.s 1f
352 spl.b %d7
353 extb.l %d7
354 or.l #1, %d7
3551: | %d7 = SIGN(%d7)
356 move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7
357
358 tst.l %d3
359 beq.s 1f
360 spl.b %d3
361 extb.l %d3
362 or.l #1, %d3
3631: | %d3 = SIGN(%d3)
364 move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3
365
366 | %d1, %d2, %a0, %a1, %a2 contain p->XcoeffsB[0..4]
367 | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
368
369 move.l (%a4), %d0 | %d0 = *decoded1
370 beq.s 3f
371
372 movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[XADAPTCOEFFSB-4]
373 | %d5 = p->buf[XADAPTCOEFFSB-3]
374 | %d6 = p->buf[XADAPTCOEFFSB-2]
375
376 bmi.s 1f | flags still valid here
377
378 | *decoded1 > 0
379
380 sub.l %d3, %d1 | %d1 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
381 sub.l %d7, %d2 | %d2 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
382 sub.l %d6, %a0 | %a0 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
383 sub.l %d5, %a1 | %a1 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
384 sub.l %d4, %a2 | %a2 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
385
386 movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[]
387
388 movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
389 | %d5 = p->XcoeffsA[1]
390 | %d6 = p->XcoeffsA[2]
391 | %d7 = p->XcoeffsA[3]
392
393 movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
394 | %d2 = p->buf[XADAPTCOEFFSA-3]
395 | %a0 = p->buf[XADAPTCOEFFSA-2]
396 | %a1 = p->buf[XADAPTCOEFFSA-1]
397 | %a2 = p->buf[XADAPTCOEFFSA]
398
399 sub.l %a2, %d4 | %d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
400 sub.l %a1, %d5 | %d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
401 sub.l %a0, %d6 | %d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
402 sub.l %d2, %d7 | %d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
403
404 bra.s 2f
405
4061: | *decoded1 < 0
407
408 add.l %d3, %d1 | %d1 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
409 add.l %d7, %d2 | %d2 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
410 add.l %d6, %a0 | %a0 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
411 add.l %d5, %a1 | %a1 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
412 add.l %d4, %a2 | %a2 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
413
414 movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[]
415
416 movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
417 | %d5 = p->XcoeffsA[1]
418 | %d6 = p->XcoeffsA[2]
419 | %d7 = p->XcoeffsA[3]
420
421 movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
422 | %d2 = p->buf[XADAPTCOEFFSA-3]
423 | %a0 = p->buf[XADAPTCOEFFSA-2]
424 | %a1 = p->buf[XADAPTCOEFFSA-1]
425 | %a2 = p->buf[XADAPTCOEFFSA]
426
427 add.l %a2, %d4 | %d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
428 add.l %a1, %d5 | %d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
429 add.l %a0, %d6 | %d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
430 add.l %d2, %d7 | %d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
431
4322:
433 movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[]
434
4353:
436 | Finish Predictor X
437
438 movclr.l %acc0, %d1 | %d1 = predictionA
439 movclr.l %acc1, %d2 | %d2 = predictionB
440 asr.l #1, %d2
441 add.l %d2, %d1 | %d1 += (%d2 >> 1)
442 asr.l #8, %d1
443 asr.l #2, %d1 | %d1 >>= 10
444 add.l %d0, %d1 | %d1 += %d0
445 move.l %d1, (XlastA,%a6) | p->XlastA = %d1
446
447 move.l (XfilterA,%a6), %d2 | %d2 = p->XfilterA
448 move.l %d2, %d0
449 lsl.l #5, %d2
450 sub.l %d0, %d2 | %d2 = 31 * %d2
451 asr.l #5, %d2 | %d6 >>= 2
452 add.l %d1, %d2
453 move.l %d2, (XfilterA,%a6) | p->XfilterA = %d2
454
455 | *decoded1 stored 3 instructions down, avoiding pipeline stall
456
457 | ***** COMMON *****
458
459 addq.l #4, %a5 | p->buf++
460 lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a2
461 | %a2 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
462
463 move.l %d2, (%a4)+ | *(decoded1++) = %d2 (p->XfilterA)
464
465 cmp.l %a2, %a5
466 beq.s .move_hist | History buffer is full, we need to do a memmove
467
468 subq.l #1, (%sp) | decrease loop count
469 bne.w .loop
470
471.done:
472 move.l %a5, (%a6) | Save value of p->buf
473 movem.l (4,%sp), %d2-%d7/%a2-%a6
474 lea.l (12*4,%sp), %sp
475 rts
476
477.move_hist:
478 lea.l (historybuffer,%a6), %a2
479
480 | dest = %a2 (p->historybuffer)
481 | src = %a5 (p->buf)
482 | n = 200
483
484 movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes
485 movem.l %d0-%d7/%a0-%a1, (%a2)
486 movem.l (40,%a5), %d0-%d7/%a0-%a1 | 40 bytes
487 movem.l %d0-%d7/%a0-%a1, (40,%a2)
488 movem.l (80,%a5), %d0-%d7/%a0-%a1 | 40 bytes
489 movem.l %d0-%d7/%a0-%a1, (80,%a2)
490 movem.l (120,%a5), %d0-%d7/%a0-%a1 | 40 bytes
491 movem.l %d0-%d7/%a0-%a1, (120,%a2)
492 movem.l (160,%a5), %d0-%d7/%a0-%a1 | 40 bytes
493 movem.l %d0-%d7/%a0-%a1, (160,%a2)
494
495 move.l %a2, %a5 | p->buf = &p->historybuffer[0]
496
497 subq.l #1, (%sp) | decrease loop count
498 bne.w .loop
499
500 bra.s .done
501 .size predictor_decode_stereo, .-predictor_decode_stereo
502
503
504 .global predictor_decode_mono
505 .type predictor_decode_mono,@function
506
507| void predictor_decode_mono(struct predictor_t* p,
508| int32_t* decoded0,
509| int count)
510
511predictor_decode_mono:
512 lea.l (-11*4,%sp), %sp
513 movem.l %d2-%d7/%a2-%a6, (%sp)
514
515 move.l #0, %macsr | signed integer mode
516
517 move.l (11*4+4,%sp), %a6 | %a6 = p
518 move.l (11*4+8,%sp), %a4 | %a4 = decoded0
519 move.l (11*4+12,%sp), %d7 | %d7 = count
520 move.l (%a6), %a5 | %a5 = p->buf
521
522 move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
523
524.loopm:
525
526 | ***** PREDICTOR *****
527
528 movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3]
529 | %d1 = p->buf[YDELAYA-2]
530 | %d2 = p->buf[YDELAYA-1]
531
532 move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3
533
534 sub.l %d3, %d2
535 neg.l %d2 | %d2 = %d3 - %d2
536
537 move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2
538
539 movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0]
540 | %a1 = p->YcoeffsA[1]
541 | %a2 = p->YcoeffsA[2]
542 | %a3 = p->YcoeffsA[3]
543
544 mac.l %d3, %a0, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0]
545 mac.l %d2, %a1, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
546 mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
547 mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
548
549 tst.l %d2
550 beq.s 1f
551 spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
552 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
553 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
5541: | %d2 = SIGN(%d2)
555 move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2
556
557 tst.l %d3
558 beq.s 1f
559 spl.b %d3
560 extb.l %d3
561 or.l #1, %d3
5621: | %d3 = SIGN(%d3)
563 move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3
564
565 move.l (%a4), %d0 | %d0 = *decoded0
566 beq.s 3f
567
568 movem.l (YADAPTCOEFFSA-12,%a5),%d4-%d5 | %d4 = p->buf[YADAPTCOEFFSA-3]
569 | %d5 = p->buf[YADAPTCOEFFSA-2]
570
571 bmi.s 1f | flags still valid here
572
573 | *decoded0 > 0
574
575 sub.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
576 sub.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
577 sub.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
578 sub.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
579
580 bra.s 2f
581
5821: | *decoded0 < 0
583
584 add.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
585 add.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
586 add.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
587 add.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
588
5892:
590 movem.l %a0-%a3, (YcoeffsA,%a6) | save p->YcoeffsA[]
591
5923:
593 | Finish Predictor
594
595 movclr.l %acc0, %d3 | %d3 = predictionA
596 asr.l #8, %d3
597 asr.l #2, %d3 | %d3 >>= 10
598 add.l %d0, %d3 | %d3 += %d0
599
600 move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA
601 move.l %d2, %d0
602 lsl.l #5, %d2
603 sub.l %d0, %d2 | %d2 = 31 * %d2
604 asr.l #5, %d2 | %d2 >>= 5
605 add.l %d3, %d2
606 move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2
607
608 | *decoded0 stored 3 instructions down, avoiding pipeline stall
609
610 | ***** COMMON *****
611
612 addq.l #4, %a5 | p->buf++
613 lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a3
614 | %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
615
616 move.l %d2, (%a4)+ | *(decoded0++) = %d2 (p->YfilterA)
617
618 cmp.l %a3, %a5
619 beq.s .move_histm | History buffer is full, we need to do a memmove
620
621 subq.l #1, %d7 | decrease loop count
622 bne.w .loopm
623
624 move.l %d3, (YlastA,%a6) | %d3 = p->YlastA
625
626.donem:
627 move.l %a5, (%a6) | Save value of p->buf
628 movem.l (%sp), %d2-%d7/%a2-%a6
629 lea.l (11*4,%sp), %sp
630 rts
631
632.move_histm:
633 move.l %d3, (YlastA,%a6) | %d3 = p->YlastA
634
635 lea.l (historybuffer,%a6), %a3
636
637 | dest = %a3 (p->historybuffer)
638 | src = %a5 (p->buf)
639 | n = 200
640
641 movem.l (%a5), %d0-%d6/%a0-%a2 | 40 bytes
642 movem.l %d0-%d6/%a0-%a2, (%a3)
643 movem.l (40,%a5), %d0-%d6/%a0-%a2 | 40 bytes
644 movem.l %d0-%d6/%a0-%a2, (40,%a3)
645 movem.l (80,%a5), %d0-%d6/%a0-%a2 | 40 bytes
646 movem.l %d0-%d6/%a0-%a2, (80,%a3)
647 movem.l (120,%a5), %d0-%d6/%a0-%a2 | 40 bytes
648 movem.l %d0-%d6/%a0-%a2, (120,%a3)
649 movem.l (160,%a5), %d0-%d6/%a0-%a2 | 40 bytes
650 movem.l %d0-%d6/%a0-%a2, (160,%a3)
651
652 move.l %a3, %a5 | p->buf = &p->historybuffer[0]
653
654 move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
655
656 subq.l #1, %d7 | decrease loop count
657 bne.w .loopm
658
659 bra.s .donem
660 .size predictor_decode_mono, .-predictor_decode_mono
diff --git a/lib/rbcodec/codecs/demac/libdemac/predictor.c b/lib/rbcodec/codecs/demac/libdemac/predictor.c
new file mode 100644
index 0000000000..45912dddbd
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/predictor.c
@@ -0,0 +1,271 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#include <inttypes.h>
26#include <string.h>
27
28#include "parser.h"
29#include "predictor.h"
30#include "demac_config.h"
31
32/* Return 0 if x is zero, -1 if x is positive, 1 if x is negative */
33#define SIGN(x) (x) ? (((x) > 0) ? -1 : 1) : 0
34
35static const int32_t initial_coeffs[4] = {
36 360, 317, -109, 98
37};
38
39#define YDELAYA (18 + PREDICTOR_ORDER*4)
40#define YDELAYB (18 + PREDICTOR_ORDER*3)
41#define XDELAYA (18 + PREDICTOR_ORDER*2)
42#define XDELAYB (18 + PREDICTOR_ORDER)
43
44#define YADAPTCOEFFSA (18)
45#define XADAPTCOEFFSA (14)
46#define YADAPTCOEFFSB (10)
47#define XADAPTCOEFFSB (5)
48
49void init_predictor_decoder(struct predictor_t* p)
50{
51 /* Zero the history buffers */
52 memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(int32_t));
53 p->buf = p->historybuffer;
54
55 /* Initialise and zero the co-efficients */
56 memcpy(p->YcoeffsA, initial_coeffs, sizeof(initial_coeffs));
57 memcpy(p->XcoeffsA, initial_coeffs, sizeof(initial_coeffs));
58 memset(p->YcoeffsB, 0, sizeof(p->YcoeffsB));
59 memset(p->XcoeffsB, 0, sizeof(p->XcoeffsB));
60
61 p->YfilterA = 0;
62 p->YfilterB = 0;
63 p->YlastA = 0;
64
65 p->XfilterA = 0;
66 p->XfilterB = 0;
67 p->XlastA = 0;
68}
69
70#if !defined(CPU_ARM) && !defined(CPU_COLDFIRE)
71void ICODE_ATTR_DEMAC predictor_decode_stereo(struct predictor_t* p,
72 int32_t* decoded0,
73 int32_t* decoded1,
74 int count)
75{
76 int32_t predictionA, predictionB;
77
78 while (LIKELY(count--))
79 {
80 /* Predictor Y */
81 p->buf[YDELAYA] = p->YlastA;
82 p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]);
83
84 p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1];
85 p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]);
86
87 predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) +
88 (p->buf[YDELAYA-1] * p->YcoeffsA[1]) +
89 (p->buf[YDELAYA-2] * p->YcoeffsA[2]) +
90 (p->buf[YDELAYA-3] * p->YcoeffsA[3]);
91
92 /* Apply a scaled first-order filter compression */
93 p->buf[YDELAYB] = p->XfilterA - ((p->YfilterB * 31) >> 5);
94 p->buf[YADAPTCOEFFSB] = SIGN(p->buf[YDELAYB]);
95 p->YfilterB = p->XfilterA;
96
97 p->buf[YDELAYB-1] = p->buf[YDELAYB] - p->buf[YDELAYB-1];
98 p->buf[YADAPTCOEFFSB-1] = SIGN(p->buf[YDELAYB-1]);
99
100 predictionB = (p->buf[YDELAYB] * p->YcoeffsB[0]) +
101 (p->buf[YDELAYB-1] * p->YcoeffsB[1]) +
102 (p->buf[YDELAYB-2] * p->YcoeffsB[2]) +
103 (p->buf[YDELAYB-3] * p->YcoeffsB[3]) +
104 (p->buf[YDELAYB-4] * p->YcoeffsB[4]);
105
106 p->YlastA = *decoded0 + ((predictionA + (predictionB >> 1)) >> 10);
107 p->YfilterA = p->YlastA + ((p->YfilterA * 31) >> 5);
108
109 /* Predictor X */
110
111 p->buf[XDELAYA] = p->XlastA;
112 p->buf[XADAPTCOEFFSA] = SIGN(p->buf[XDELAYA]);
113 p->buf[XDELAYA-1] = p->buf[XDELAYA] - p->buf[XDELAYA-1];
114 p->buf[XADAPTCOEFFSA-1] = SIGN(p->buf[XDELAYA-1]);
115
116 predictionA = (p->buf[XDELAYA] * p->XcoeffsA[0]) +
117 (p->buf[XDELAYA-1] * p->XcoeffsA[1]) +
118 (p->buf[XDELAYA-2] * p->XcoeffsA[2]) +
119 (p->buf[XDELAYA-3] * p->XcoeffsA[3]);
120
121 /* Apply a scaled first-order filter compression */
122 p->buf[XDELAYB] = p->YfilterA - ((p->XfilterB * 31) >> 5);
123 p->buf[XADAPTCOEFFSB] = SIGN(p->buf[XDELAYB]);
124 p->XfilterB = p->YfilterA;
125 p->buf[XDELAYB-1] = p->buf[XDELAYB] - p->buf[XDELAYB-1];
126 p->buf[XADAPTCOEFFSB-1] = SIGN(p->buf[XDELAYB-1]);
127
128 predictionB = (p->buf[XDELAYB] * p->XcoeffsB[0]) +
129 (p->buf[XDELAYB-1] * p->XcoeffsB[1]) +
130 (p->buf[XDELAYB-2] * p->XcoeffsB[2]) +
131 (p->buf[XDELAYB-3] * p->XcoeffsB[3]) +
132 (p->buf[XDELAYB-4] * p->XcoeffsB[4]);
133
134 p->XlastA = *decoded1 + ((predictionA + (predictionB >> 1)) >> 10);
135 p->XfilterA = p->XlastA + ((p->XfilterA * 31) >> 5);
136
137 if (LIKELY(*decoded0 != 0))
138 {
139 if (*decoded0 > 0)
140 {
141 p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA];
142 p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1];
143 p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2];
144 p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3];
145
146 p->YcoeffsB[0] -= p->buf[YADAPTCOEFFSB];
147 p->YcoeffsB[1] -= p->buf[YADAPTCOEFFSB-1];
148 p->YcoeffsB[2] -= p->buf[YADAPTCOEFFSB-2];
149 p->YcoeffsB[3] -= p->buf[YADAPTCOEFFSB-3];
150 p->YcoeffsB[4] -= p->buf[YADAPTCOEFFSB-4];
151 }
152 else
153 {
154 p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA];
155 p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1];
156 p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2];
157 p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3];
158
159 p->YcoeffsB[0] += p->buf[YADAPTCOEFFSB];
160 p->YcoeffsB[1] += p->buf[YADAPTCOEFFSB-1];
161 p->YcoeffsB[2] += p->buf[YADAPTCOEFFSB-2];
162 p->YcoeffsB[3] += p->buf[YADAPTCOEFFSB-3];
163 p->YcoeffsB[4] += p->buf[YADAPTCOEFFSB-4];
164 }
165 }
166
167 *(decoded0++) = p->YfilterA;
168
169 if (LIKELY(*decoded1 != 0))
170 {
171 if (*decoded1 > 0)
172 {
173 p->XcoeffsA[0] -= p->buf[XADAPTCOEFFSA];
174 p->XcoeffsA[1] -= p->buf[XADAPTCOEFFSA-1];
175 p->XcoeffsA[2] -= p->buf[XADAPTCOEFFSA-2];
176 p->XcoeffsA[3] -= p->buf[XADAPTCOEFFSA-3];
177
178 p->XcoeffsB[0] -= p->buf[XADAPTCOEFFSB];
179 p->XcoeffsB[1] -= p->buf[XADAPTCOEFFSB-1];
180 p->XcoeffsB[2] -= p->buf[XADAPTCOEFFSB-2];
181 p->XcoeffsB[3] -= p->buf[XADAPTCOEFFSB-3];
182 p->XcoeffsB[4] -= p->buf[XADAPTCOEFFSB-4];
183 }
184 else
185 {
186 p->XcoeffsA[0] += p->buf[XADAPTCOEFFSA];
187 p->XcoeffsA[1] += p->buf[XADAPTCOEFFSA-1];
188 p->XcoeffsA[2] += p->buf[XADAPTCOEFFSA-2];
189 p->XcoeffsA[3] += p->buf[XADAPTCOEFFSA-3];
190
191 p->XcoeffsB[0] += p->buf[XADAPTCOEFFSB];
192 p->XcoeffsB[1] += p->buf[XADAPTCOEFFSB-1];
193 p->XcoeffsB[2] += p->buf[XADAPTCOEFFSB-2];
194 p->XcoeffsB[3] += p->buf[XADAPTCOEFFSB-3];
195 p->XcoeffsB[4] += p->buf[XADAPTCOEFFSB-4];
196 }
197 }
198
199 *(decoded1++) = p->XfilterA;
200
201 /* Combined */
202 p->buf++;
203
204 /* Have we filled the history buffer? */
205 if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) {
206 memmove(p->historybuffer, p->buf,
207 PREDICTOR_SIZE * sizeof(int32_t));
208 p->buf = p->historybuffer;
209 }
210 }
211}
212
213void ICODE_ATTR_DEMAC predictor_decode_mono(struct predictor_t* p,
214 int32_t* decoded0,
215 int count)
216{
217 int32_t predictionA, currentA, A;
218
219 currentA = p->YlastA;
220
221 while (LIKELY(count--))
222 {
223 A = *decoded0;
224
225 p->buf[YDELAYA] = currentA;
226 p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1];
227
228 predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) +
229 (p->buf[YDELAYA-1] * p->YcoeffsA[1]) +
230 (p->buf[YDELAYA-2] * p->YcoeffsA[2]) +
231 (p->buf[YDELAYA-3] * p->YcoeffsA[3]);
232
233 currentA = A + (predictionA >> 10);
234
235 p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]);
236 p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]);
237
238 if (LIKELY(A != 0))
239 {
240 if (A > 0)
241 {
242 p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA];
243 p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1];
244 p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2];
245 p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3];
246 }
247 else
248 {
249 p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA];
250 p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1];
251 p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2];
252 p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3];
253 }
254 }
255
256 p->buf++;
257
258 /* Have we filled the history buffer? */
259 if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) {
260 memmove(p->historybuffer, p->buf,
261 PREDICTOR_SIZE * sizeof(int32_t));
262 p->buf = p->historybuffer;
263 }
264
265 p->YfilterA = currentA + ((p->YfilterA * 31) >> 5);
266 *(decoded0++) = p->YfilterA;
267 }
268
269 p->YlastA = currentA;
270}
271#endif
diff --git a/lib/rbcodec/codecs/demac/libdemac/predictor.h b/lib/rbcodec/codecs/demac/libdemac/predictor.h
new file mode 100644
index 0000000000..6a0a81983b
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/predictor.h
@@ -0,0 +1,38 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#ifndef _APE_PREDICTOR_H
26#define _APE_PREDICTOR_H
27
28#include <inttypes.h>
29#include "parser.h"
30#include "filter.h"
31
32void init_predictor_decoder(struct predictor_t* p);
33void predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0,
34 int32_t* decoded1, int count);
35void predictor_decode_mono(struct predictor_t* p, int32_t* decoded0,
36 int count);
37
38#endif
diff --git a/lib/rbcodec/codecs/demac/libdemac/udiv32_arm-pre.S b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm-pre.S
new file mode 100644
index 0000000000..459cab8240
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm-pre.S
@@ -0,0 +1,25 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2010 by Andrew Mahone
11 *
12 * Wrapper for udiv32_arm.S to test available IRAM by pre-linking the codec.
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
18 *
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
21 *
22 ****************************************************************************/
23
24#define APE_PRE
25#include "udiv32_arm.S"
diff --git a/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S
new file mode 100644
index 0000000000..7b851659bd
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S
@@ -0,0 +1,318 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2008 by Jens Arnold
11 * Copyright (C) 2009 by Andrew Mahone
12 *
13 * Optimised unsigned integer division for ARMv4
14 *
15 * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System
16 * Developer's Guide
17 * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
18 * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
19 * Free Software Foundation, Inc.
20 *
21 * This program is free software; you can redistribute it and/or
22 * modify it under the terms of the GNU General Public License
23 * as published by the Free Software Foundation; either version 2
24 * of the License, or (at your option) any later version.
25 *
26 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
27 * KIND, either express or implied.
28 *
29 ****************************************************************************/
30
31#include "config.h"
32/* On targets with codec iram, a header file will be generated after an initial
33 link of the APE codec, stating the amount of IRAM remaining for use by the
34 reciprocal lookup table. */
35#if !defined(APE_PRE) && defined(USE_IRAM) && ARM_ARCH < 5
36#include "lib/rbcodec/codecs/ape_free_iram.h"
37#endif
38
39/* Codecs should not normally do this, but we need to check a macro, and
40 * codecs.h would confuse the assembler. */
41
42#ifdef USE_IRAM
43#define DIV_RECIP
44 .section .icode,"ax",%progbits
45#else
46 .text
47#endif
48 .align
49 .global udiv32_arm
50 .type udiv32_arm,%function
51
52#if ARM_ARCH < 5
53/* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
54 for dividing a 30-bit value by a 15-bit value, with two operations per
55 iteration by storing quotient and remainder together and adding the previous
56 quotient bit during trial subtraction. Modified to work with any dividend
57 and divisor both less than 1 << 30, and skipping trials by calculating bits
58 in output. */
59.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder
60
61 mov \bits, #1
62 /* Shift the divisor left until it aligns with the numerator. If it already
63 has the high bit set, this is fine, everything inside .rept will be
64 skipped, and the add before and adcs after will set the one-bit result
65 to zero. */
66 cmn \divisor, \dividend, lsr #16
67 movcs \divisor, \divisor, lsl #16
68 addcs \bits, \bits, #16
69 cmn \divisor, \dividend, lsr #8
70 movcs \divisor, \divisor, lsl #8
71 addcs \bits, \bits, #8
72 cmn \divisor, \dividend, lsr #4
73 movcs \divisor, \divisor, lsl #4
74 addcs \bits, \bits, #4
75 cmn \divisor, \dividend, lsr #2
76 movcs \divisor, \divisor, lsl #2
77 addcs \bits, \bits, #2
78 cmn \divisor, \dividend, lsr #1
79 movcs \divisor, \divisor, lsl #1
80 addcs \bits, \bits, #1
81 adds \result, \dividend, \divisor
82 subcc \result, \result, \divisor
83 rsb \curbit, \bits, #31
84 add pc, pc, \curbit, lsl #3
85 nop
86 .rept 30
87 adcs \result, \divisor, \result, lsl #1
88 /* Fix the remainder portion of the result. This must be done because the
89 handler for 32-bit numerators needs the remainder. */
90 subcc \result, \result, \divisor
91 .endr
92 /* Shift remainder/quotient left one, add final quotient bit */
93 adc \result, \result, \result
94 mov \remainder, \result, lsr \bits
95 eor \quotient, \result, \remainder, lsl \bits
96.endm
97
98#ifndef FREE_IRAM
99.set recip_max, 2
100#else
101/* Each table entry is one word. Since a compare is done against the maximum
102 entry as an immediate, the maximum entry must be a valid ARM immediate,
103 which means a byte shifted by an even number of places. */
104.set recip_max, 2 + FREE_IRAM / 4
105.set recip_max_tmp, recip_max >> 8
106.set recip_mask_shift, 0
107.set tmp_shift, 16
108.rept 5
109 .if recip_max_tmp >> tmp_shift
110 .set recip_max_tmp, recip_max_tmp >> tmp_shift
111 .set recip_mask_shift, recip_mask_shift + tmp_shift
112 .endif
113 .set tmp_shift, tmp_shift >> 1
114.endr
115.if recip_max_tmp
116 .set recip_mask_shift, recip_mask_shift + 1
117.endif
118.set recip_mask_shift, (recip_mask_shift + 1) & 62
119.set recip_max, recip_max & (255 << recip_mask_shift)
120//.set recip_max, 2
121#endif
122
123udiv32_arm:
124#ifdef DIV_RECIP
125 cmp r1, #3
126 bcc .L_udiv_tiny
127 cmp r1, #recip_max
128 bhi .L_udiv
129 adr r3, .L_udiv_recip_table-12
130 ldr r2, [r3, r1, lsl #2]
131 mov r3, r0
132 umull ip, r0, r2, r0
133 mul r2, r0, r1
134 cmp r3, r2
135 bxcs lr
136 sub r0, r0, #1
137 bx lr
138.L_udiv_tiny:
139 cmp r1, #1
140 movhi r0, r0, lsr #1
141 bxcs lr
142 b .L_div0
143#endif
144.L_udiv:
145 /* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor
146 and add the next bit of the result. The correction code at .L_udiv32
147 does not need the divisor inverted, but can be modified to work with it,
148 and this allows the zero divisor test to be done early and without an
149 explicit comparison. */
150 rsbs r1, r1, #0
151#ifndef DIV_RECIP
152 beq .L_div0
153#endif
154 tst r0, r0
155 /* High bit must be unset, otherwise shift numerator right, calculate,
156 and correct results. As this case is very uncommon we want to avoid
157 any other delays on the main path in handling it, so the long divide
158 calls the short divide as a function. */
159 bmi .L_udiv32
160.L_udiv31:
161 ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1
162 bx lr
163.L_udiv32:
164 /* store original numerator and divisor, we'll need them to correct the
165 result, */
166 stmdb sp, { r0, r1, lr }
167 /* Call __div0 here if divisor is zero, otherwise it would report the wrong
168 address. */
169 mov r0, r0, lsr #1
170 bl .L_udiv31
171 ldmdb sp, { r2, r3, lr }
172 /* Move the low bit of the original numerator to the carry bit */
173 movs r2, r2, lsr #1
174 /* Shift the remainder left one and add in the carry bit */
175 adc r1, r1, r1
176 /* Subtract the original divisor from the remainder, setting carry if the
177 result is non-negative */
178 adds r1, r1, r3
179 /* Shift quotient left one and add carry bit */
180 adc r0, r0, r0
181 bx lr
182.L_div0:
183 /* __div0 expects the calling address on the top of the stack */
184 stmdb sp!, { lr }
185 mov r0, #0
186#if defined(__ARM_EABI__) || !defined(USE_IRAM)
187 bl __div0
188#else
189 ldr pc, [pc, #-4]
190 .word __div0
191#endif
192#ifdef DIV_RECIP
193.L_udiv_recip_table:
194 .set div, 3
195 .rept recip_max - 2
196 .if (div - 1) & div
197 .set q, 0x40000000 / div
198 .set r, (0x40000000 - (q * div))<<1
199 .set q, q << 1
200 .if r >= div
201 .set q, q + 1
202 .set r, r - div
203 .endif
204 .set r, r << 1
205 .set q, q << 1
206 .if r >= div
207 .set q, q + 1
208 .set r, r - div
209 .endif
210 .set q, q + 1
211 .else
212 .set q, 0x40000000 / div * 4
213 .endif
214 .word q
215 .set div, div+1
216 .endr
217#endif
218 .size udiv32_arm, . - udiv32_arm
219
220#else
221.macro ARMV5_UDIV32_BODY numerator, divisor, quotient, bits, inv, neg, div0label
222 cmp \numerator, \divisor
223 clz \bits, \divisor
224 bcc 30f
225 mov \inv, \divisor, lsl \bits
226 add \neg, pc, \inv, lsr #25
227 cmp \inv, #1<<31
228 ldrhib \inv, [\neg, #.L_udiv_est_table-.-64]
229 bls 20f
230 subs \bits, \bits, #7
231 rsb \neg, \divisor, #0
232 movpl \divisor, \inv, lsl \bits
233 bmi 10f
234 mul \inv, \divisor, \neg
235 smlawt \divisor, \divisor, \inv, \divisor
236 mul \inv, \divisor, \neg
237 /* This will save a cycle on ARMv6, but requires that the numerator sign
238 bit is not set (that of inv is guaranteed unset). The branch should
239 predict very well, making it typically 1 cycle, and thus both the branch
240 and test fill delay cycles for the multiplies. Based on logging of
241 numerator sizes in the APE codec, the branch is taken about 1/10^7 of
242 the time. */
243#if ARM_ARCH >= 6
244 tst \numerator, \numerator
245 smmla \divisor, \divisor, \inv, \divisor
246 bmi 40f
247 smmul \inv, \numerator, \divisor
248#else
249 mov \bits, #0
250 smlal \bits, \divisor, \inv, \divisor
251 umull \bits, \inv, \numerator, \divisor
252#endif
253 add \numerator, \numerator, \neg
254 mla \divisor, \inv, \neg, \numerator
255 mov \quotient, \inv
256 cmn \divisor, \neg
257 addcc \quotient, \quotient, #1
258 addpl \quotient, \quotient, #2
259 bx lr
26010:
261 rsb \bits, \bits, #0
262 sub \inv, \inv, #4
263 mov \divisor, \inv, lsr \bits
264 umull \bits, \inv, \numerator, \divisor
265 mla \divisor, \inv, \neg, \numerator
266 mov \quotient, \inv
267 cmn \neg, \divisor, lsr #1
268 addcs \divisor, \divisor, \neg, lsl #1
269 addcs \quotient, \quotient, #2
270 cmn \neg, \divisor
271 addcs \quotient, \quotient, #1
272 bx lr
27320:
274.ifnc "", "\div0label"
275 rsb \bits, \bits, #31
276 bne \div0label
277.endif
278 mov \quotient, \numerator, lsr \bits
279 bx lr
28030:
281 mov \quotient, #0
282 bx lr
283#if ARM_ARCH >= 6
28440:
285 umull \bits, \inv, \numerator, \divisor
286 add \numerator, \numerator, \neg
287 mla \divisor, \inv, \neg, \numerator
288 mov \quotient, \inv
289 cmn \divisor, \neg
290 addcc \quotient, \quotient, #1
291 addpl \quotient, \quotient, #2
292 bx lr
293#endif
294.endm
295
296udiv32_arm:
297 ARMV5_UDIV32_BODY r0, r1, r0, r2, r3, ip, .L_div0
298.L_div0:
299 /* __div0 expects the calling address on the top of the stack */
300 stmdb sp!, { lr }
301 mov r0, #0
302#if defined(__ARM_EABI__) || !defined(USE_IRAM)
303 bl __div0
304#else
305 ldr pc, [pc, #-4]
306 .word __div0
307#endif
308.L_udiv_est_table:
309 .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6
310 .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf
311 .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc
312 .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac
313 .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f
314 .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93
315 .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89
316 .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81
317#endif
318 .size udiv32_arm, . - udiv32_arm
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv5te.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv5te.h
new file mode 100644
index 0000000000..ae7427c137
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv5te.h
@@ -0,0 +1,404 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9ARMv5te vector math copyright (C) 2008 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27#define FUSED_VECTOR_MATH
28
29#define REPEAT_3(x) x x x
30#if ORDER > 16
31#define REPEAT_MLA(x) x x x x x x x
32#else
33#define REPEAT_MLA(x) x x x
34#endif
35
36/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
37 * This version fetches data as 32 bit words, and *requires* v1 to be
38 * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
39 * aligned or both unaligned. If either condition isn't met, it will either
40 * result in a data abort or incorrect results. */
41static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
42{
43 int res;
44#if ORDER > 16
45 int cnt = ORDER>>4;
46#endif
47
48#define ADDHALFREGS(sum, s1, s2) /* Adds register */ \
49 "mov " #s1 ", " #s1 ", ror #16 \n" /* halves straight */ \
50 "add " #sum ", " #s1 ", " #s2 ", lsl #16 \n" /* Clobbers 's1' */ \
51 "add " #s1 ", " #s1 ", " #s2 ", lsr #16 \n" \
52 "mov " #s1 ", " #s1 ", lsl #16 \n" \
53 "orr " #sum ", " #s1 ", " #sum ", lsr #16 \n"
54
55#define ADDHALFXREGS(sum, s1, s2) /* Adds register */ \
56 "add " #s1 ", " #s1 ", " #sum ", lsl #16 \n" /* halves across. */ \
57 "add " #sum ", " #s2 ", " #sum ", lsr #16 \n" /* Clobbers 's1'. */ \
58 "mov " #sum ", " #sum ", lsl #16 \n" \
59 "orr " #sum ", " #sum ", " #s1 ", lsr #16 \n"
60
61 asm volatile (
62#if ORDER > 16
63 "mov %[res], #0 \n"
64#endif
65 "tst %[f2], #2 \n"
66 "beq 20f \n"
67
68 "10: \n"
69 "ldrh r4, [%[s2]], #2 \n"
70 "mov r4, r4, lsl #16 \n"
71 "ldrh r3, [%[f2]], #2 \n"
72#if ORDER > 16
73 "mov r3, r3, lsl #16 \n"
74 "1: \n"
75 "ldmia %[v1], {r0,r1} \n"
76 "smlabt %[res], r0, r3, %[res] \n"
77#else
78 "ldmia %[v1], {r0,r1} \n"
79 "smulbb %[res], r0, r3 \n"
80#endif
81 "ldmia %[f2]!, {r2,r3} \n"
82 "smlatb %[res], r0, r2, %[res] \n"
83 "smlabt %[res], r1, r2, %[res] \n"
84 "smlatb %[res], r1, r3, %[res] \n"
85 "ldmia %[s2]!, {r2,r5} \n"
86 ADDHALFXREGS(r0, r4, r2)
87 ADDHALFXREGS(r1, r2, r5)
88 "stmia %[v1]!, {r0,r1} \n"
89 "ldmia %[v1], {r0,r1} \n"
90 "smlabt %[res], r0, r3, %[res] \n"
91 "ldmia %[f2]!, {r2,r3} \n"
92 "smlatb %[res], r0, r2, %[res] \n"
93 "smlabt %[res], r1, r2, %[res] \n"
94 "smlatb %[res], r1, r3, %[res] \n"
95 "ldmia %[s2]!, {r2,r4} \n"
96 ADDHALFXREGS(r0, r5, r2)
97 ADDHALFXREGS(r1, r2, r4)
98 "stmia %[v1]!, {r0,r1} \n"
99
100 "ldmia %[v1], {r0,r1} \n"
101 "smlabt %[res], r0, r3, %[res] \n"
102 "ldmia %[f2]!, {r2,r3} \n"
103 "smlatb %[res], r0, r2, %[res] \n"
104 "smlabt %[res], r1, r2, %[res] \n"
105 "smlatb %[res], r1, r3, %[res] \n"
106 "ldmia %[s2]!, {r2,r5} \n"
107 ADDHALFXREGS(r0, r4, r2)
108 ADDHALFXREGS(r1, r2, r5)
109 "stmia %[v1]!, {r0,r1} \n"
110 "ldmia %[v1], {r0,r1} \n"
111 "smlabt %[res], r0, r3, %[res] \n"
112 "ldmia %[f2]!, {r2,r3} \n"
113 "smlatb %[res], r0, r2, %[res] \n"
114 "smlabt %[res], r1, r2, %[res] \n"
115 "smlatb %[res], r1, r3, %[res] \n"
116 "ldmia %[s2]!, {r2,r4} \n"
117 ADDHALFXREGS(r0, r5, r2)
118 ADDHALFXREGS(r1, r2, r4)
119 "stmia %[v1]!, {r0,r1} \n"
120#if ORDER > 16
121 "subs %[cnt], %[cnt], #1 \n"
122 "bne 1b \n"
123#endif
124 "b 99f \n"
125
126 "20: \n"
127 "1: \n"
128 "ldmia %[v1], {r1,r2} \n"
129 "ldmia %[f2]!, {r3,r4} \n"
130#if ORDER > 16
131 "smlabb %[res], r1, r3, %[res] \n"
132#else
133 "smulbb %[res], r1, r3 \n"
134#endif
135 "smlatt %[res], r1, r3, %[res] \n"
136 "smlabb %[res], r2, r4, %[res] \n"
137 "smlatt %[res], r2, r4, %[res] \n"
138 "ldmia %[s2]!, {r3,r4} \n"
139 ADDHALFREGS(r0, r1, r3)
140 ADDHALFREGS(r1, r2, r4)
141 "stmia %[v1]!, {r0,r1} \n"
142
143 REPEAT_3(
144 "ldmia %[v1], {r1,r2} \n"
145 "ldmia %[f2]!, {r3,r4} \n"
146 "smlabb %[res], r1, r3, %[res] \n"
147 "smlatt %[res], r1, r3, %[res] \n"
148 "smlabb %[res], r2, r4, %[res] \n"
149 "smlatt %[res], r2, r4, %[res] \n"
150 "ldmia %[s2]!, {r3,r4} \n"
151 ADDHALFREGS(r0, r1, r3)
152 ADDHALFREGS(r1, r2, r4)
153 "stmia %[v1]!, {r0,r1} \n"
154 )
155#if ORDER > 16
156 "subs %[cnt], %[cnt], #1 \n"
157 "bne 1b \n"
158#endif
159
160 "99: \n"
161 : /* outputs */
162#if ORDER > 16
163 [cnt]"+r"(cnt),
164#endif
165 [v1] "+r"(v1),
166 [f2] "+r"(f2),
167 [s2] "+r"(s2),
168 [res]"=r"(res)
169 : /* inputs */
170 : /* clobbers */
171 "r0", "r1", "r2", "r3", "r4", "r5", "cc", "memory"
172 );
173 return res;
174}
175
176/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance)
177 * This version fetches data as 32 bit words, and *requires* v1 to be
178 * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
179 * aligned or both unaligned. If either condition isn't met, it will either
180 * result in a data abort or incorrect results. */
181static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
182{
183 int res;
184#if ORDER > 16
185 int cnt = ORDER>>4;
186#endif
187
188#define SUBHALFREGS(dif, s1, s2) /* Subtracts reg. */ \
189 "mov " #s1 ", " #s1 ", ror #16 \n" /* halves straight */ \
190 "sub " #dif ", " #s1 ", " #s2 ", lsl #16 \n" /* Clobbers 's1' */ \
191 "sub " #s1 ", " #s1 ", " #s2 ", lsr #16 \n" \
192 "mov " #s1 ", " #s1 ", lsl #16 \n" \
193 "orr " #dif ", " #s1 ", " #dif ", lsr #16 \n"
194
195#define SUBHALFXREGS(dif, s1, s2, msk) /* Subtracts reg. */ \
196 "sub " #s1 ", " #dif ", " #s1 ", lsr #16 \n" /* halves across. */ \
197 "and " #s1 ", " #s1 ", " #msk " \n" /* Needs msk = */ \
198 "rsb " #dif ", " #s2 ", " #dif ", lsr #16 \n" /* 0x0000ffff, */ \
199 "orr " #dif ", " #s1 ", " #dif ", lsl #16 \n" /* clobbers 's1'. */
200
201 asm volatile (
202#if ORDER > 16
203 "mov %[res], #0 \n"
204#endif
205 "tst %[f2], #2 \n"
206 "beq 20f \n"
207
208 "10: \n"
209 "mov r6, #0xff \n"
210 "orr r6, r6, #0xff00 \n"
211 "ldrh r4, [%[s2]], #2 \n"
212 "mov r4, r4, lsl #16 \n"
213 "ldrh r3, [%[f2]], #2 \n"
214#if ORDER > 16
215 "mov r3, r3, lsl #16 \n"
216 "1: \n"
217 "ldmia %[v1], {r0,r1} \n"
218 "smlabt %[res], r0, r3, %[res] \n"
219#else
220 "ldmia %[v1], {r0,r1} \n"
221 "smulbb %[res], r0, r3 \n"
222#endif
223 "ldmia %[f2]!, {r2,r3} \n"
224 "smlatb %[res], r0, r2, %[res] \n"
225 "smlabt %[res], r1, r2, %[res] \n"
226 "smlatb %[res], r1, r3, %[res] \n"
227 "ldmia %[s2]!, {r2,r5} \n"
228 SUBHALFXREGS(r0, r4, r2, r6)
229 SUBHALFXREGS(r1, r2, r5, r6)
230 "stmia %[v1]!, {r0,r1} \n"
231 "ldmia %[v1], {r0,r1} \n"
232 "smlabt %[res], r0, r3, %[res] \n"
233 "ldmia %[f2]!, {r2,r3} \n"
234 "smlatb %[res], r0, r2, %[res] \n"
235 "smlabt %[res], r1, r2, %[res] \n"
236 "smlatb %[res], r1, r3, %[res] \n"
237 "ldmia %[s2]!, {r2,r4} \n"
238 SUBHALFXREGS(r0, r5, r2, r6)
239 SUBHALFXREGS(r1, r2, r4, r6)
240 "stmia %[v1]!, {r0,r1} \n"
241
242 "ldmia %[v1], {r0,r1} \n"
243 "smlabt %[res], r0, r3, %[res] \n"
244 "ldmia %[f2]!, {r2,r3} \n"
245 "smlatb %[res], r0, r2, %[res] \n"
246 "smlabt %[res], r1, r2, %[res] \n"
247 "smlatb %[res], r1, r3, %[res] \n"
248 "ldmia %[s2]!, {r2,r5} \n"
249 SUBHALFXREGS(r0, r4, r2, r6)
250 SUBHALFXREGS(r1, r2, r5, r6)
251 "stmia %[v1]!, {r0,r1} \n"
252 "ldmia %[v1], {r0,r1} \n"
253 "smlabt %[res], r0, r3, %[res] \n"
254 "ldmia %[f2]!, {r2,r3} \n"
255 "smlatb %[res], r0, r2, %[res] \n"
256 "smlabt %[res], r1, r2, %[res] \n"
257 "smlatb %[res], r1, r3, %[res] \n"
258 "ldmia %[s2]!, {r2,r4} \n"
259 SUBHALFXREGS(r0, r5, r2, r6)
260 SUBHALFXREGS(r1, r2, r4, r6)
261 "stmia %[v1]!, {r0,r1} \n"
262#if ORDER > 16
263 "subs %[cnt], %[cnt], #1 \n"
264 "bne 1b \n"
265#endif
266 "b 99f \n"
267
268 "20: \n"
269 "1: \n"
270 "ldmia %[v1], {r1,r2} \n"
271 "ldmia %[f2]!, {r3,r4} \n"
272#if ORDER > 16
273 "smlabb %[res], r1, r3, %[res] \n"
274#else
275 "smulbb %[res], r1, r3 \n"
276#endif
277 "smlatt %[res], r1, r3, %[res] \n"
278 "smlabb %[res], r2, r4, %[res] \n"
279 "smlatt %[res], r2, r4, %[res] \n"
280 "ldmia %[s2]!, {r3,r4} \n"
281 SUBHALFREGS(r0, r1, r3)
282 SUBHALFREGS(r1, r2, r4)
283 "stmia %[v1]!, {r0,r1} \n"
284
285 REPEAT_3(
286 "ldmia %[v1], {r1,r2} \n"
287 "ldmia %[f2]!, {r3,r4} \n"
288 "smlabb %[res], r1, r3, %[res] \n"
289 "smlatt %[res], r1, r3, %[res] \n"
290 "smlabb %[res], r2, r4, %[res] \n"
291 "smlatt %[res], r2, r4, %[res] \n"
292 "ldmia %[s2]!, {r3,r4} \n"
293 SUBHALFREGS(r0, r1, r3)
294 SUBHALFREGS(r1, r2, r4)
295 "stmia %[v1]!, {r0,r1} \n"
296 )
297#if ORDER > 16
298 "subs %[cnt], %[cnt], #1 \n"
299 "bne 1b \n"
300#endif
301
302 "99: \n"
303 : /* outputs */
304#if ORDER > 16
305 [cnt]"+r"(cnt),
306#endif
307 [v1] "+r"(v1),
308 [f2] "+r"(f2),
309 [s2] "+r"(s2),
310 [res]"=r"(res)
311 : /* inputs */
312 : /* clobbers */
313 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "cc", "memory"
314 );
315 return res;
316}
317
318/* This version fetches data as 32 bit words, and *requires* v1 to be
319 * 32 bit aligned, otherwise it will result either in a data abort, or
320 * incorrect results (if ARM aligncheck is disabled). */
321static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
322{
323 int res;
324#if ORDER > 32
325 int cnt = ORDER>>5;
326#endif
327
328 asm volatile (
329#if ORDER > 32
330 "mov %[res], #0 \n"
331#endif
332 "tst %[v2], #2 \n"
333 "beq 20f \n"
334
335 "10: \n"
336 "ldrh r3, [%[v2]], #2 \n"
337#if ORDER > 32
338 "mov r3, r3, lsl #16 \n"
339 "1: \n"
340 "ldmia %[v1]!, {r0,r1} \n"
341 "smlabt %[res], r0, r3, %[res] \n"
342#else
343 "ldmia %[v1]!, {r0,r1} \n"
344 "smulbb %[res], r0, r3 \n"
345#endif
346 "ldmia %[v2]!, {r2,r3} \n"
347 "smlatb %[res], r0, r2, %[res] \n"
348 "smlabt %[res], r1, r2, %[res] \n"
349 "smlatb %[res], r1, r3, %[res] \n"
350
351 REPEAT_MLA(
352 "ldmia %[v1]!, {r0,r1} \n"
353 "smlabt %[res], r0, r3, %[res] \n"
354 "ldmia %[v2]!, {r2,r3} \n"
355 "smlatb %[res], r0, r2, %[res] \n"
356 "smlabt %[res], r1, r2, %[res] \n"
357 "smlatb %[res], r1, r3, %[res] \n"
358 )
359#if ORDER > 32
360 "subs %[cnt], %[cnt], #1 \n"
361 "bne 1b \n"
362#endif
363 "b 99f \n"
364
365 "20: \n"
366 "1: \n"
367 "ldmia %[v1]!, {r0,r1} \n"
368 "ldmia %[v2]!, {r2,r3} \n"
369#if ORDER > 32
370 "smlabb %[res], r0, r2, %[res] \n"
371#else
372 "smulbb %[res], r0, r2 \n"
373#endif
374 "smlatt %[res], r0, r2, %[res] \n"
375 "smlabb %[res], r1, r3, %[res] \n"
376 "smlatt %[res], r1, r3, %[res] \n"
377
378 REPEAT_MLA(
379 "ldmia %[v1]!, {r0,r1} \n"
380 "ldmia %[v2]!, {r2,r3} \n"
381 "smlabb %[res], r0, r2, %[res] \n"
382 "smlatt %[res], r0, r2, %[res] \n"
383 "smlabb %[res], r1, r3, %[res] \n"
384 "smlatt %[res], r1, r3, %[res] \n"
385 )
386#if ORDER > 32
387 "subs %[cnt], %[cnt], #1 \n"
388 "bne 1b \n"
389#endif
390
391 "99: \n"
392 : /* outputs */
393#if ORDER > 32
394 [cnt]"+r"(cnt),
395#endif
396 [v1] "+r"(v1),
397 [v2] "+r"(v2),
398 [res]"=r"(res)
399 : /* inputs */
400 : /* clobbers */
401 "r0", "r1", "r2", "r3", "cc", "memory"
402 );
403 return res;
404}
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h
new file mode 100644
index 0000000000..8d27331b62
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h
@@ -0,0 +1,490 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9ARMv6 vector math copyright (C) 2008 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27#define FUSED_VECTOR_MATH
28
29#if ORDER > 16
30#define REPEAT_BLOCK(x) x x x
31#else
32#define REPEAT_BLOCK(x) x
33#endif
34
35/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
36 * This version fetches data as 32 bit words, and *requires* v1 to be
37 * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
38 * aligned or both unaligned. If either condition isn't met, it will either
39 * result in a data abort or incorrect results. */
40static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
41{
42 int res;
43#if ORDER > 32
44 int cnt = ORDER>>5;
45#endif
46
47 asm volatile (
48#if ORDER > 32
49 "mov %[res], #0 \n"
50#endif
51 "tst %[f2], #2 \n"
52 "beq 20f \n"
53
54 "10: \n"
55 "ldrh r3, [%[f2]], #2 \n"
56 "ldrh r6, [%[s2]], #2 \n"
57 "ldmia %[f2]!, {r2,r4} \n"
58 "mov r3, r3, lsl #16 \n"
59 "mov r6, r6, lsl #16 \n"
60
61 "1: \n"
62 "ldmia %[s2]!, {r5,r7} \n"
63 "pkhtb r3, r3, r2 \n"
64 "pkhtb r2, r2, r4 \n"
65 "ldrd r0, [%[v1]] \n"
66 "mov r5, r5, ror #16 \n"
67 "pkhtb r6, r5, r6, asr #16 \n"
68 "pkhbt r5, r5, r7, lsl #16 \n"
69#if ORDER > 32
70 "smladx %[res], r0, r3, %[res] \n"
71#else
72 "smuadx %[res], r0, r3 \n"
73#endif
74 "smladx %[res], r1, r2, %[res] \n"
75 "ldmia %[f2]!, {r2,r3} \n"
76 "sadd16 r0, r0, r6 \n"
77 "sadd16 r1, r1, r5 \n"
78 "strd r0, [%[v1]], #8 \n"
79
80 REPEAT_BLOCK(
81 "ldmia %[s2]!, {r5,r6} \n"
82 "pkhtb r4, r4, r2 \n"
83 "pkhtb r2, r2, r3 \n"
84 "ldrd r0, [%[v1]] \n"
85 "mov r5, r5, ror #16 \n"
86 "pkhtb r7, r5, r7, asr #16 \n"
87 "pkhbt r5, r5, r6, lsl #16 \n"
88 "smladx %[res], r0, r4, %[res] \n"
89 "smladx %[res], r1, r2, %[res] \n"
90 "ldmia %[f2]!, {r2,r4} \n"
91 "sadd16 r0, r0, r7 \n"
92 "sadd16 r1, r1, r5 \n"
93 "strd r0, [%[v1]], #8 \n"
94 "ldmia %[s2]!, {r5,r7} \n"
95 "pkhtb r3, r3, r2 \n"
96 "pkhtb r2, r2, r4 \n"
97 "ldrd r0, [%[v1]] \n"
98 "mov r5, r5, ror #16 \n"
99 "pkhtb r6, r5, r6, asr #16 \n"
100 "pkhbt r5, r5, r7, lsl #16 \n"
101 "smladx %[res], r0, r3, %[res] \n"
102 "smladx %[res], r1, r2, %[res] \n"
103 "ldmia %[f2]!, {r2,r3} \n"
104 "sadd16 r0, r0, r6 \n"
105 "sadd16 r1, r1, r5 \n"
106 "strd r0, [%[v1]], #8 \n"
107 )
108
109 "ldmia %[s2]!, {r5,r6} \n"
110 "pkhtb r4, r4, r2 \n"
111 "pkhtb r2, r2, r3 \n"
112 "ldrd r0, [%[v1]] \n"
113 "mov r5, r5, ror #16 \n"
114 "pkhtb r7, r5, r7, asr #16 \n"
115 "pkhbt r5, r5, r6, lsl #16 \n"
116 "smladx %[res], r0, r4, %[res] \n"
117 "smladx %[res], r1, r2, %[res] \n"
118#if ORDER > 32
119 "subs %[cnt], %[cnt], #1 \n"
120 "ldmneia %[f2]!, {r2,r4} \n"
121 "sadd16 r0, r0, r7 \n"
122 "sadd16 r1, r1, r5 \n"
123 "strd r0, [%[v1]], #8 \n"
124 "bne 1b \n"
125#else
126 "sadd16 r0, r0, r7 \n"
127 "sadd16 r1, r1, r5 \n"
128 "strd r0, [%[v1]], #8 \n"
129#endif
130
131 "b 99f \n"
132
133 "20: \n"
134 "ldrd r4, [%[f2]], #8 \n"
135 "ldrd r0, [%[v1]] \n"
136
137#if ORDER > 32
138 "1: \n"
139 "smlad %[res], r0, r4, %[res] \n"
140#else
141 "smuad %[res], r0, r4 \n"
142#endif
143 "ldrd r6, [%[s2]], #8 \n"
144 "smlad %[res], r1, r5, %[res] \n"
145 "ldrd r4, [%[f2]], #8 \n"
146 "ldrd r2, [%[v1], #8] \n"
147 "sadd16 r0, r0, r6 \n"
148 "sadd16 r1, r1, r7 \n"
149 "strd r0, [%[v1]], #8 \n"
150
151 REPEAT_BLOCK(
152 "smlad %[res], r2, r4, %[res] \n"
153 "ldrd r6, [%[s2]], #8 \n"
154 "smlad %[res], r3, r5, %[res] \n"
155 "ldrd r4, [%[f2]], #8 \n"
156 "ldrd r0, [%[v1], #8] \n"
157 "sadd16 r2, r2, r6 \n"
158 "sadd16 r3, r3, r7 \n"
159 "strd r2, [%[v1]], #8 \n"
160 "smlad %[res], r0, r4, %[res] \n"
161 "ldrd r6, [%[s2]], #8 \n"
162 "smlad %[res], r1, r5, %[res] \n"
163 "ldrd r4, [%[f2]], #8 \n"
164 "ldrd r2, [%[v1], #8] \n"
165 "sadd16 r0, r0, r6 \n"
166 "sadd16 r1, r1, r7 \n"
167 "strd r0, [%[v1]], #8 \n"
168 )
169
170 "smlad %[res], r2, r4, %[res] \n"
171 "ldrd r6, [%[s2]], #8 \n"
172 "smlad %[res], r3, r5, %[res] \n"
173#if ORDER > 32
174 "subs %[cnt], %[cnt], #1 \n"
175 "ldrned r4, [%[f2]], #8 \n"
176 "ldrned r0, [%[v1], #8] \n"
177 "sadd16 r2, r2, r6 \n"
178 "sadd16 r3, r3, r7 \n"
179 "strd r2, [%[v1]], #8 \n"
180 "bne 1b \n"
181#else
182 "sadd16 r2, r2, r6 \n"
183 "sadd16 r3, r3, r7 \n"
184 "strd r2, [%[v1]], #8 \n"
185#endif
186
187 "99: \n"
188 : /* outputs */
189#if ORDER > 32
190 [cnt]"+r"(cnt),
191#endif
192 [v1] "+r"(v1),
193 [f2] "+r"(f2),
194 [s2] "+r"(s2),
195 [res]"=r"(res)
196 : /* inputs */
197 : /* clobbers */
198 "r0", "r1", "r2", "r3", "r4",
199 "r5", "r6", "r7", "cc", "memory"
200 );
201 return res;
202}
203
204/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance)
205 * This version fetches data as 32 bit words, and *requires* v1 to be
206 * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
207 * aligned or both unaligned. If either condition isn't met, it will either
208 * result in a data abort or incorrect results. */
209static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
210{
211 int res;
212#if ORDER > 32
213 int cnt = ORDER>>5;
214#endif
215
216 asm volatile (
217#if ORDER > 32
218 "mov %[res], #0 \n"
219#endif
220 "tst %[f2], #2 \n"
221 "beq 20f \n"
222
223 "10: \n"
224 "ldrh r3, [%[f2]], #2 \n"
225 "ldrh r6, [%[s2]], #2 \n"
226 "ldmia %[f2]!, {r2,r4} \n"
227 "mov r3, r3, lsl #16 \n"
228 "mov r6, r6, lsl #16 \n"
229
230 "1: \n"
231 "ldmia %[s2]!, {r5,r7} \n"
232 "pkhtb r3, r3, r2 \n"
233 "pkhtb r2, r2, r4 \n"
234 "ldrd r0, [%[v1]] \n"
235 "mov r5, r5, ror #16 \n"
236 "pkhtb r6, r5, r6, asr #16 \n"
237 "pkhbt r5, r5, r7, lsl #16 \n"
238#if ORDER > 32
239 "smladx %[res], r0, r3, %[res] \n"
240#else
241 "smuadx %[res], r0, r3 \n"
242#endif
243 "smladx %[res], r1, r2, %[res] \n"
244 "ldmia %[f2]!, {r2,r3} \n"
245 "ssub16 r0, r0, r6 \n"
246 "ssub16 r1, r1, r5 \n"
247 "strd r0, [%[v1]], #8 \n"
248
249 REPEAT_BLOCK(
250 "ldmia %[s2]!, {r5,r6} \n"
251 "pkhtb r4, r4, r2 \n"
252 "pkhtb r2, r2, r3 \n"
253 "ldrd r0, [%[v1]] \n"
254 "mov r5, r5, ror #16 \n"
255 "pkhtb r7, r5, r7, asr #16 \n"
256 "pkhbt r5, r5, r6, lsl #16 \n"
257 "smladx %[res], r0, r4, %[res] \n"
258 "smladx %[res], r1, r2, %[res] \n"
259 "ldmia %[f2]!, {r2,r4} \n"
260 "ssub16 r0, r0, r7 \n"
261 "ssub16 r1, r1, r5 \n"
262 "strd r0, [%[v1]], #8 \n"
263 "ldmia %[s2]!, {r5,r7} \n"
264 "pkhtb r3, r3, r2 \n"
265 "pkhtb r2, r2, r4 \n"
266 "ldrd r0, [%[v1]] \n"
267 "mov r5, r5, ror #16 \n"
268 "pkhtb r6, r5, r6, asr #16 \n"
269 "pkhbt r5, r5, r7, lsl #16 \n"
270 "smladx %[res], r0, r3, %[res] \n"
271 "smladx %[res], r1, r2, %[res] \n"
272 "ldmia %[f2]!, {r2,r3} \n"
273 "ssub16 r0, r0, r6 \n"
274 "ssub16 r1, r1, r5 \n"
275 "strd r0, [%[v1]], #8 \n"
276 )
277
278 "ldmia %[s2]!, {r5,r6} \n"
279 "pkhtb r4, r4, r2 \n"
280 "pkhtb r2, r2, r3 \n"
281 "ldrd r0, [%[v1]] \n"
282 "mov r5, r5, ror #16 \n"
283 "pkhtb r7, r5, r7, asr #16 \n"
284 "pkhbt r5, r5, r6, lsl #16 \n"
285 "smladx %[res], r0, r4, %[res] \n"
286 "smladx %[res], r1, r2, %[res] \n"
287#if ORDER > 32
288 "subs %[cnt], %[cnt], #1 \n"
289 "ldmneia %[f2]!, {r2,r4} \n"
290 "ssub16 r0, r0, r7 \n"
291 "ssub16 r1, r1, r5 \n"
292 "strd r0, [%[v1]], #8 \n"
293 "bne 1b \n"
294#else
295 "ssub16 r0, r0, r7 \n"
296 "ssub16 r1, r1, r5 \n"
297 "strd r0, [%[v1]], #8 \n"
298#endif
299
300 "b 99f \n"
301
302 "20: \n"
303 "ldrd r4, [%[f2]], #8 \n"
304 "ldrd r0, [%[v1]] \n"
305
306#if ORDER > 32
307 "1: \n"
308 "smlad %[res], r0, r4, %[res] \n"
309#else
310 "smuad %[res], r0, r4 \n"
311#endif
312 "ldrd r6, [%[s2]], #8 \n"
313 "smlad %[res], r1, r5, %[res] \n"
314 "ldrd r4, [%[f2]], #8 \n"
315 "ldrd r2, [%[v1], #8] \n"
316 "ssub16 r0, r0, r6 \n"
317 "ssub16 r1, r1, r7 \n"
318 "strd r0, [%[v1]], #8 \n"
319
320 REPEAT_BLOCK(
321 "smlad %[res], r2, r4, %[res] \n"
322 "ldrd r6, [%[s2]], #8 \n"
323 "smlad %[res], r3, r5, %[res] \n"
324 "ldrd r4, [%[f2]], #8 \n"
325 "ldrd r0, [%[v1], #8] \n"
326 "ssub16 r2, r2, r6 \n"
327 "ssub16 r3, r3, r7 \n"
328 "strd r2, [%[v1]], #8 \n"
329 "smlad %[res], r0, r4, %[res] \n"
330 "ldrd r6, [%[s2]], #8 \n"
331 "smlad %[res], r1, r5, %[res] \n"
332 "ldrd r4, [%[f2]], #8 \n"
333 "ldrd r2, [%[v1], #8] \n"
334 "ssub16 r0, r0, r6 \n"
335 "ssub16 r1, r1, r7 \n"
336 "strd r0, [%[v1]], #8 \n"
337 )
338
339 "smlad %[res], r2, r4, %[res] \n"
340 "ldrd r6, [%[s2]], #8 \n"
341 "smlad %[res], r3, r5, %[res] \n"
342#if ORDER > 32
343 "subs %[cnt], %[cnt], #1 \n"
344 "ldrned r4, [%[f2]], #8 \n"
345 "ldrned r0, [%[v1], #8] \n"
346 "ssub16 r2, r2, r6 \n"
347 "ssub16 r3, r3, r7 \n"
348 "strd r2, [%[v1]], #8 \n"
349 "bne 1b \n"
350#else
351 "ssub16 r2, r2, r6 \n"
352 "ssub16 r3, r3, r7 \n"
353 "strd r2, [%[v1]], #8 \n"
354#endif
355
356 "99: \n"
357 : /* outputs */
358#if ORDER > 32
359 [cnt]"+r"(cnt),
360#endif
361 [v1] "+r"(v1),
362 [f2] "+r"(f2),
363 [s2] "+r"(s2),
364 [res]"=r"(res)
365 : /* inputs */
366 : /* clobbers */
367 "r0", "r1", "r2", "r3", "r4",
368 "r5", "r6", "r7", "cc", "memory"
369 );
370 return res;
371}
372
373/* This version fetches data as 32 bit words, and *requires* v1 to be
374 * 32 bit aligned, otherwise it will result either in a data abort, or
375 * incorrect results (if ARM aligncheck is disabled). */
376static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
377{
378 int res;
379#if ORDER > 32
380 int cnt = ORDER>>5;
381#endif
382
383 asm volatile (
384#if ORDER > 32
385 "mov %[res], #0 \n"
386#endif
387 "tst %[v2], #2 \n"
388 "beq 20f \n"
389
390 "10: \n"
391 "bic %[v2], %[v2], #2 \n"
392 "ldmia %[v2]!, {r5-r7} \n"
393 "ldrd r0, [%[v1]], #8 \n"
394
395 "1: \n"
396 "pkhtb r3, r5, r6 \n"
397 "ldrd r4, [%[v2]], #8 \n"
398#if ORDER > 32
399 "smladx %[res], r0, r3, %[res] \n"
400#else
401 "smuadx %[res], r0, r3 \n"
402#endif
403 REPEAT_BLOCK(
404 "pkhtb r0, r6, r7 \n"
405 "ldrd r2, [%[v1]], #8 \n"
406 "smladx %[res], r1, r0, %[res] \n"
407 "pkhtb r1, r7, r4 \n"
408 "ldrd r6, [%[v2]], #8 \n"
409 "smladx %[res], r2, r1, %[res] \n"
410 "pkhtb r2, r4, r5 \n"
411 "ldrd r0, [%[v1]], #8 \n"
412 "smladx %[res], r3, r2, %[res] \n"
413 "pkhtb r3, r5, r6 \n"
414 "ldrd r4, [%[v2]], #8 \n"
415 "smladx %[res], r0, r3, %[res] \n"
416 )
417
418 "pkhtb r0, r6, r7 \n"
419 "ldrd r2, [%[v1]], #8 \n"
420 "smladx %[res], r1, r0, %[res] \n"
421 "pkhtb r1, r7, r4 \n"
422#if ORDER > 32
423 "subs %[cnt], %[cnt], #1 \n"
424 "ldrned r6, [%[v2]], #8 \n"
425 "smladx %[res], r2, r1, %[res] \n"
426 "pkhtb r2, r4, r5 \n"
427 "ldrned r0, [%[v1]], #8 \n"
428 "smladx %[res], r3, r2, %[res] \n"
429 "bne 1b \n"
430#else
431 "pkhtb r4, r4, r5 \n"
432 "smladx %[res], r2, r1, %[res] \n"
433 "smladx %[res], r3, r4, %[res] \n"
434#endif
435
436 "b 99f \n"
437
438 "20: \n"
439 "ldrd r0, [%[v1]], #8 \n"
440 "ldmia %[v2]!, {r5-r7} \n"
441
442 "1: \n"
443 "ldrd r2, [%[v1]], #8 \n"
444#if ORDER > 32
445 "smlad %[res], r0, r5, %[res] \n"
446#else
447 "smuad %[res], r0, r5 \n"
448#endif
449 REPEAT_BLOCK(
450 "ldrd r4, [%[v2]], #8 \n"
451 "smlad %[res], r1, r6, %[res] \n"
452 "ldrd r0, [%[v1]], #8 \n"
453 "smlad %[res], r2, r7, %[res] \n"
454 "ldrd r6, [%[v2]], #8 \n"
455 "smlad %[res], r3, r4, %[res] \n"
456 "ldrd r2, [%[v1]], #8 \n"
457 "smlad %[res], r0, r5, %[res] \n"
458 )
459
460#if ORDER > 32
461 "ldrd r4, [%[v2]], #8 \n"
462 "smlad %[res], r1, r6, %[res] \n"
463 "subs %[cnt], %[cnt], #1 \n"
464 "ldrned r0, [%[v1]], #8 \n"
465 "smlad %[res], r2, r7, %[res] \n"
466 "ldrned r6, [%[v2]], #8 \n"
467 "smlad %[res], r3, r4, %[res] \n"
468 "bne 1b \n"
469#else
470 "ldr r4, [%[v2]], #4 \n"
471 "smlad %[res], r1, r6, %[res] \n"
472 "smlad %[res], r2, r7, %[res] \n"
473 "smlad %[res], r3, r4, %[res] \n"
474#endif
475
476 "99: \n"
477 : /* outputs */
478#if ORDER > 32
479 [cnt]"+r"(cnt),
480#endif
481 [v1] "+r"(v1),
482 [v2] "+r"(v2),
483 [res]"=r"(res)
484 : /* inputs */
485 : /* clobbers */
486 "r0", "r1", "r2", "r3",
487 "r4", "r5", "r6", "r7", "cc", "memory"
488 );
489 return res;
490}
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv7.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv7.h
new file mode 100644
index 0000000000..84afda3e5d
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv7.h
@@ -0,0 +1,214 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9ARMv7 neon vector math copyright (C) 2010 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27#define FUSED_VECTOR_MATH
28
29#if ORDER > 32
30#define REPEAT_BLOCK(x) x x x
31#elif ORDER > 16
32#define REPEAT_BLOCK(x) x
33#else
34#define REPEAT_BLOCK(x)
35#endif
36
37/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
38static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
39{
40 int res;
41#if ORDER > 64
42 int cnt = ORDER>>6;
43#endif
44
45 asm volatile (
46#if ORDER > 64
47 "vmov.i16 q0, #0 \n"
48 "1: \n"
49 "subs %[cnt], %[cnt], #1 \n"
50#endif
51 "vld1.16 {d6-d9}, [%[f2]]! \n"
52 "vld1.16 {d2-d5}, [%[v1]] \n"
53 "vld1.16 {d10-d13}, [%[s2]]! \n"
54#if ORDER > 64
55 "vmlal.s16 q0, d2, d6 \n"
56#else
57 "vmull.s16 q0, d2, d6 \n"
58#endif
59 "vmlal.s16 q0, d3, d7 \n"
60 "vmlal.s16 q0, d4, d8 \n"
61 "vmlal.s16 q0, d5, d9 \n"
62 "vadd.i16 q1, q1, q5 \n"
63 "vadd.i16 q2, q2, q6 \n"
64 "vst1.16 {d2-d5}, [%[v1]]! \n"
65
66 REPEAT_BLOCK(
67 "vld1.16 {d6-d9}, [%[f2]]! \n"
68 "vld1.16 {d2-d5}, [%[v1]] \n"
69 "vld1.16 {d10-d13}, [%[s2]]! \n"
70 "vmlal.s16 q0, d2, d6 \n"
71 "vmlal.s16 q0, d3, d7 \n"
72 "vmlal.s16 q0, d4, d8 \n"
73 "vmlal.s16 q0, d5, d9 \n"
74 "vadd.i16 q1, q1, q5 \n"
75 "vadd.i16 q2, q2, q6 \n"
76 "vst1.16 {d2-d5}, [%[v1]]! \n"
77 )
78#if ORDER > 64
79 "bne 1b \n"
80#endif
81 "vpadd.i32 d0, d0, d1 \n"
82 "vpaddl.s32 d0, d0 \n"
83 "vmov.32 %[res], d0[0] \n"
84 : /* outputs */
85#if ORDER > 64
86 [cnt]"+r"(cnt),
87#endif
88 [v1] "+r"(v1),
89 [f2] "+r"(f2),
90 [s2] "+r"(s2),
91 [res]"=r"(res)
92 : /* inputs */
93 : /* clobbers */
94 "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
95 "d8", "d9", "d10", "d11", "d12", "d13", "memory"
96 );
97 return res;
98}
99
100/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) */
101static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
102{
103 int res;
104#if ORDER > 64
105 int cnt = ORDER>>6;
106#endif
107
108 asm volatile (
109#if ORDER > 64
110 "vmov.i16 q0, #0 \n"
111 "1: \n"
112 "subs %[cnt], %[cnt], #1 \n"
113#endif
114 "vld1.16 {d6-d9}, [%[f2]]! \n"
115 "vld1.16 {d2-d5}, [%[v1]] \n"
116 "vld1.16 {d10-d13}, [%[s2]]! \n"
117#if ORDER > 64
118 "vmlal.s16 q0, d2, d6 \n"
119#else
120 "vmull.s16 q0, d2, d6 \n"
121#endif
122 "vmlal.s16 q0, d3, d7 \n"
123 "vmlal.s16 q0, d4, d8 \n"
124 "vmlal.s16 q0, d5, d9 \n"
125 "vsub.i16 q1, q1, q5 \n"
126 "vsub.i16 q2, q2, q6 \n"
127 "vst1.16 {d2-d5}, [%[v1]]! \n"
128
129 REPEAT_BLOCK(
130 "vld1.16 {d6-d9}, [%[f2]]! \n"
131 "vld1.16 {d2-d5}, [%[v1]] \n"
132 "vld1.16 {d10-d13}, [%[s2]]! \n"
133 "vmlal.s16 q0, d2, d6 \n"
134 "vmlal.s16 q0, d3, d7 \n"
135 "vmlal.s16 q0, d4, d8 \n"
136 "vmlal.s16 q0, d5, d9 \n"
137 "vsub.i16 q1, q1, q5 \n"
138 "vsub.i16 q2, q2, q6 \n"
139 "vst1.16 {d2-d5}, [%[v1]]! \n"
140 )
141#if ORDER > 64
142 "bne 1b \n"
143#endif
144 "vpadd.i32 d0, d0, d1 \n"
145 "vpaddl.s32 d0, d0 \n"
146 "vmov.32 %[res], d0[0] \n"
147 : /* outputs */
148#if ORDER > 64
149 [cnt]"+r"(cnt),
150#endif
151 [v1] "+r"(v1),
152 [f2] "+r"(f2),
153 [s2] "+r"(s2),
154 [res]"=r"(res)
155 : /* inputs */
156 : /* clobbers */
157 "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
158 "d8", "d9", "d10", "d11", "d12", "d13", "memory"
159 );
160 return res;
161}
162
163static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
164{
165 int res;
166#if ORDER > 64
167 int cnt = ORDER>>6;
168#endif
169
170 asm volatile (
171#if ORDER > 64
172 "vmov.i16 q0, #0 \n"
173 "1: \n"
174 "subs %[cnt], %[cnt], #1 \n"
175#endif
176 "vld1.16 {d2-d5}, [%[v1]]! \n"
177 "vld1.16 {d6-d9}, [%[v2]]! \n"
178#if ORDER > 64
179 "vmlal.s16 q0, d2, d6 \n"
180#else
181 "vmull.s16 q0, d2, d6 \n"
182#endif
183 "vmlal.s16 q0, d3, d7 \n"
184 "vmlal.s16 q0, d4, d8 \n"
185 "vmlal.s16 q0, d5, d9 \n"
186
187 REPEAT_BLOCK(
188 "vld1.16 {d2-d5}, [%[v1]]! \n"
189 "vld1.16 {d6-d9}, [%[v2]]! \n"
190 "vmlal.s16 q0, d2, d6 \n"
191 "vmlal.s16 q0, d3, d7 \n"
192 "vmlal.s16 q0, d4, d8 \n"
193 "vmlal.s16 q0, d5, d9 \n"
194 )
195#if ORDER > 64
196 "bne 1b \n"
197#endif
198 "vpadd.i32 d0, d0, d1 \n"
199 "vpaddl.s32 d0, d0 \n"
200 "vmov.32 %[res], d0[0] \n"
201 : /* outputs */
202#if ORDER > 64
203 [cnt]"+r"(cnt),
204#endif
205 [v1] "+r"(v1),
206 [v2] "+r"(v2),
207 [res]"=r"(res)
208 : /* inputs */
209 : /* clobbers */
210 "d0", "d1", "d2", "d3", "d4",
211 "d5", "d6", "d7", "d8", "d9"
212 );
213 return res;
214}
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_cf.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_cf.h
new file mode 100644
index 0000000000..4d77d3be31
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_cf.h
@@ -0,0 +1,364 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9Coldfire vector math copyright (C) 2007 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27#define FUSED_VECTOR_MATH
28
29#define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */
30
31#define REPEAT_2(x) x x
32#define REPEAT_3(x) x x x
33#define REPEAT_7(x) x x x x x x x
34
35/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
36 * This version fetches data as 32 bit words, and *recommends* v1 to be
37 * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit
38 * aligned or both unaligned. Performance will suffer if either condition
39 * isn't met. It also needs EMAC in signed integer mode. */
40static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
41{
42 int res;
43#if ORDER > 16
44 int cnt = ORDER>>4;
45#endif
46
47#define ADDHALFREGS(s1, s2, sum) /* Add register halves straight. */ \
48 "move.l " #s1 ", " #sum "\n" /* 's1' and 's2' can be A or D */ \
49 "add.l " #s2 ", " #s1 "\n" /* regs, 'sum' must be a D reg. */ \
50 "clr.w " #sum " \n" /* 's1' is clobbered! */ \
51 "add.l " #s2 ", " #sum "\n" \
52 "move.w " #s1 ", " #sum "\n"
53
54#define ADDHALFXREGS(s1, s2, sum) /* Add register halves across. */ \
55 "clr.w " #sum " \n" /* Needs 'sum' pre-swapped, swaps */ \
56 "add.l " #s1 ", " #sum "\n" /* 's2', and clobbers 's1'. */ \
57 "swap " #s2 " \n" /* 's1' can be an A or D reg. */ \
58 "add.l " #s2 ", " #s1 "\n" /* 'sum' and 's2' must be D regs. */ \
59 "move.w " #s1 ", " #sum "\n"
60
61 asm volatile (
62 "move.l %[f2], %%d0 \n"
63 "and.l #2, %%d0 \n"
64 "jeq 20f \n"
65
66 "10: \n"
67 "move.w (%[f2])+, %%d0 \n"
68 "move.w (%[s2])+, %%d1 \n"
69 "swap %%d1 \n"
70 "1: \n"
71 REPEAT_2(
72 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
73 "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n"
74 "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n"
75 ADDHALFXREGS(%%d6, %%d2, %%d1)
76 "mac.w %%d0l, %%d7u, (%[f2])+, %%d0, %%acc0\n"
77 "mac.w %%d0u, %%d7l, (%[s2])+, %%d6, %%acc0\n"
78 "move.l %%d1, (%[v1])+ \n"
79 ADDHALFXREGS(%%d7, %%d6, %%d2)
80 "mac.w %%d0l, %%a0u, (%[f2])+, %%d0, %%acc0\n"
81 "mac.w %%d0u, %%a0l, (%[s2])+, %%d7, %%acc0\n"
82 "move.l %%d2, (%[v1])+ \n"
83 ADDHALFXREGS(%%a0, %%d7, %%d6)
84 "mac.w %%d0l, %%a1u, (%[f2])+, %%d0, %%acc0\n"
85 "mac.w %%d0u, %%a1l, (%[s2])+, %%d1, %%acc0\n"
86 "move.l %%d6, (%[v1])+ \n"
87 ADDHALFXREGS(%%a1, %%d1, %%d7)
88 "move.l %%d7, (%[v1])+ \n"
89 )
90
91#if ORDER > 16
92 "subq.l #1, %[res] \n"
93 "bne.w 1b \n"
94#endif
95 "jra 99f \n"
96
97 "20: \n"
98 "move.l (%[f2])+, %%d0 \n"
99 "1: \n"
100 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
101 "mac.w %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n"
102 "mac.w %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n"
103 ADDHALFREGS(%%d6, %%d1, %%d2)
104 "mac.w %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n"
105 "mac.w %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n"
106 "move.l %%d2, (%[v1])+ \n"
107 ADDHALFREGS(%%d7, %%d1, %%d2)
108 "mac.w %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n"
109 "mac.w %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n"
110 "move.l %%d2, (%[v1])+ \n"
111 ADDHALFREGS(%%a0, %%d1, %%d2)
112 "mac.w %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n"
113 "mac.w %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n"
114 "move.l %%d2, (%[v1])+ \n"
115 ADDHALFREGS(%%a1, %%d1, %%d2)
116 "move.l %%d2, (%[v1])+ \n"
117
118 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
119 "mac.w %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n"
120 "mac.w %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n"
121 ADDHALFREGS(%%d6, %%d1, %%d2)
122 "mac.w %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n"
123 "mac.w %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n"
124 "move.l %%d2, (%[v1])+ \n"
125 ADDHALFREGS(%%d7, %%d1, %%d2)
126 "mac.w %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n"
127 "mac.w %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n"
128 "move.l %%d2, (%[v1])+ \n"
129 ADDHALFREGS(%%a0, %%d1, %%d2)
130 "mac.w %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n"
131#if ORDER > 16
132 "mac.w %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n"
133#else
134 "mac.w %%d0l, %%a1l, %%acc0 \n"
135#endif
136 "move.l %%d2, (%[v1])+ \n"
137 ADDHALFREGS(%%a1, %%d1, %%d2)
138 "move.l %%d2, (%[v1])+ \n"
139#if ORDER > 16
140 "subq.l #1, %[res] \n"
141 "bne.w 1b \n"
142#endif
143
144 "99: \n"
145 "movclr.l %%acc0, %[res] \n"
146 : /* outputs */
147 [v1]"+a"(v1),
148 [f2]"+a"(f2),
149 [s2]"+a"(s2),
150 [res]"=d"(res)
151 : /* inputs */
152#if ORDER > 16
153 [cnt]"[res]"(cnt)
154#endif
155 : /* clobbers */
156 "d0", "d1", "d2", "d6", "d7",
157 "a0", "a1", "memory"
158
159 );
160 return res;
161}
162
163/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance)
164 * This version fetches data as 32 bit words, and *recommends* v1 to be
165 * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit
166 * aligned or both unaligned. Performance will suffer if either condition
167 * isn't met. It also needs EMAC in signed integer mode. */
168static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
169{
170 int res;
171#if ORDER > 16
172 int cnt = ORDER>>4;
173#endif
174
175#define SUBHALFREGS(min, sub, dif) /* Subtract register halves straight. */ \
176 "move.l " #min ", " #dif "\n" /* 'min' can be an A or D reg */ \
177 "sub.l " #sub ", " #min "\n" /* 'sub' and 'dif' must be D regs */ \
178 "clr.w " #sub "\n" /* 'min' and 'sub' are clobbered! */ \
179 "sub.l " #sub ", " #dif "\n" \
180 "move.w " #min ", " #dif "\n"
181
182#define SUBHALFXREGS(min, s2, s1d) /* Subtract register halves across. */ \
183 "clr.w " #s1d "\n" /* Needs 's1d' pre-swapped, swaps */ \
184 "sub.l " #s1d ", " #min "\n" /* 's2' and clobbers 'min'. */ \
185 "move.l " #min ", " #s1d "\n" /* 'min' can be an A or D reg, */ \
186 "swap " #s2 "\n" /* 's2' and 's1d' must be D regs. */ \
187 "sub.l " #s2 ", " #min "\n" \
188 "move.w " #min ", " #s1d "\n"
189
190 asm volatile (
191 "move.l %[f2], %%d0 \n"
192 "and.l #2, %%d0 \n"
193 "jeq 20f \n"
194
195 "10: \n"
196 "move.w (%[f2])+, %%d0 \n"
197 "move.w (%[s2])+, %%d1 \n"
198 "swap %%d1 \n"
199 "1: \n"
200 REPEAT_2(
201 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
202 "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n"
203 "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n"
204 SUBHALFXREGS(%%d6, %%d2, %%d1)
205 "mac.w %%d0l, %%d7u, (%[f2])+, %%d0, %%acc0\n"
206 "mac.w %%d0u, %%d7l, (%[s2])+, %%d6, %%acc0\n"
207 "move.l %%d1, (%[v1])+ \n"
208 SUBHALFXREGS(%%d7, %%d6, %%d2)
209 "mac.w %%d0l, %%a0u, (%[f2])+, %%d0, %%acc0\n"
210 "mac.w %%d0u, %%a0l, (%[s2])+, %%d7, %%acc0\n"
211 "move.l %%d2, (%[v1])+ \n"
212 SUBHALFXREGS(%%a0, %%d7, %%d6)
213 "mac.w %%d0l, %%a1u, (%[f2])+, %%d0, %%acc0\n"
214 "mac.w %%d0u, %%a1l, (%[s2])+, %%d1, %%acc0\n"
215 "move.l %%d6, (%[v1])+ \n"
216 SUBHALFXREGS(%%a1, %%d1, %%d7)
217 "move.l %%d7, (%[v1])+ \n"
218 )
219
220#if ORDER > 16
221 "subq.l #1, %[res] \n"
222 "bne.w 1b \n"
223#endif
224
225 "jra 99f \n"
226
227 "20: \n"
228 "move.l (%[f2])+, %%d0 \n"
229 "1: \n"
230 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
231 "mac.w %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n"
232 "mac.w %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n"
233 SUBHALFREGS(%%d6, %%d1, %%d2)
234 "mac.w %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n"
235 "mac.w %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n"
236 "move.l %%d2, (%[v1])+ \n"
237 SUBHALFREGS(%%d7, %%d1, %%d2)
238 "mac.w %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n"
239 "mac.w %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n"
240 "move.l %%d2, (%[v1])+ \n"
241 SUBHALFREGS(%%a0, %%d1, %%d2)
242 "mac.w %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n"
243 "mac.w %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n"
244 "move.l %%d2, (%[v1])+ \n"
245 SUBHALFREGS(%%a1, %%d1, %%d2)
246 "move.l %%d2, (%[v1])+ \n"
247
248 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
249 "mac.w %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n"
250 "mac.w %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n"
251 SUBHALFREGS(%%d6, %%d1, %%d2)
252 "mac.w %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n"
253 "mac.w %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n"
254 "move.l %%d2, (%[v1])+ \n"
255 SUBHALFREGS(%%d7, %%d1, %%d2)
256 "mac.w %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n"
257 "mac.w %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n"
258 "move.l %%d2, (%[v1])+ \n"
259 SUBHALFREGS(%%a0, %%d1, %%d2)
260 "mac.w %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n"
261#if ORDER > 16
262 "mac.w %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n"
263#else
264 "mac.w %%d0l, %%a1l, %%acc0 \n"
265#endif
266 "move.l %%d2, (%[v1])+ \n"
267 SUBHALFREGS(%%a1, %%d1, %%d2)
268 "move.l %%d2, (%[v1])+ \n"
269#if ORDER > 16
270 "subq.l #1, %[res] \n"
271 "bne.w 1b \n"
272#endif
273
274 "99: \n"
275 "movclr.l %%acc0, %[res] \n"
276 : /* outputs */
277 [v1]"+a"(v1),
278 [f2]"+a"(f2),
279 [s2]"+a"(s2),
280 [res]"=d"(res)
281 : /* inputs */
282#if ORDER > 16
283 [cnt]"[res]"(cnt)
284#endif
285 : /* clobbers */
286 "d0", "d1", "d2", "d6", "d7",
287 "a0", "a1", "memory"
288
289 );
290 return res;
291}
292
293/* This version fetches data as 32 bit words, and *recommends* v1 to be
294 * 32 bit aligned, otherwise performance will suffer. It also needs EMAC
295 * in signed integer mode. */
296static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
297{
298 int res;
299#if ORDER > 16
300 int cnt = ORDER>>4;
301#endif
302
303 asm volatile (
304 "move.l %[v2], %%d0 \n"
305 "and.l #2, %%d0 \n"
306 "jeq 20f \n"
307
308 "10: \n"
309 "move.l (%[v1])+, %%d0 \n"
310 "move.w (%[v2])+, %%d1 \n"
311 "1: \n"
312 REPEAT_7(
313 "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
314 "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
315 )
316
317 "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
318#if ORDER > 16
319 "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
320 "subq.l #1, %[res] \n"
321 "bne.b 1b \n"
322#else
323 "mac.w %%d0l, %%d1u, %%acc0 \n"
324#endif
325 "jra 99f \n"
326
327 "20: \n"
328 "move.l (%[v1])+, %%d0 \n"
329 "move.l (%[v2])+, %%d1 \n"
330 "1: \n"
331 REPEAT_3(
332 "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
333 "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
334 "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
335 "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
336 )
337
338 "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
339 "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
340#if ORDER > 16
341 "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
342 "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
343 "subq.l #1, %[res] \n"
344 "bne.b 1b \n"
345#else
346 "mac.w %%d2u, %%d1u, %%acc0 \n"
347 "mac.w %%d2l, %%d1l, %%acc0 \n"
348#endif
349
350 "99: \n"
351 "movclr.l %%acc0, %[res] \n"
352 : /* outputs */
353 [v1]"+a"(v1),
354 [v2]"+a"(v2),
355 [res]"=d"(res)
356 : /* inputs */
357#if ORDER > 16
358 [cnt]"[res]"(cnt)
359#endif
360 : /* clobbers */
361 "d0", "d1", "d2"
362 );
363 return res;
364}
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h
new file mode 100644
index 0000000000..2177fe88ea
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h
@@ -0,0 +1,234 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9MMX vector math copyright (C) 2010 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27#define FUSED_VECTOR_MATH
28
29#define REPEAT_MB3(x, n) x(n) x(n+8) x(n+16)
30#define REPEAT_MB7(x, n) x(n) x(n+8) x(n+16) x(n+24) x(n+32) x(n+40) x(n+48)
31#define REPEAT_MB8(x, n) REPEAT_MB7(x, n) x(n+56)
32
33#if ORDER == 16 /* 3 times */
34#define REPEAT_MB(x) REPEAT_MB3(x, 8)
35#elif ORDER == 32 /* 7 times */
36#define REPEAT_MB(x) REPEAT_MB7(x, 8)
37#elif ORDER == 64 /* 5*3 == 15 times */
38#define REPEAT_MB(x) REPEAT_MB3(x, 8) REPEAT_MB3(x, 32) REPEAT_MB3(x, 56) \
39 REPEAT_MB3(x, 80) REPEAT_MB3(x, 104)
40#elif ORDER == 256 /* 9*7 == 63 times */
41#define REPEAT_MB(x) REPEAT_MB7(x, 8) REPEAT_MB7(x, 64) REPEAT_MB7(x, 120) \
42 REPEAT_MB7(x, 176) REPEAT_MB7(x, 232) REPEAT_MB7(x, 288) \
43 REPEAT_MB7(x, 344) REPEAT_MB7(x, 400) REPEAT_MB7(x, 456)
44#elif ORDER == 1280 /* 8*8 == 64 times */
45#define REPEAT_MB(x) REPEAT_MB8(x, 0) REPEAT_MB8(x, 64) REPEAT_MB8(x, 128) \
46 REPEAT_MB8(x, 192) REPEAT_MB8(x, 256) REPEAT_MB8(x, 320) \
47 REPEAT_MB8(x, 384) REPEAT_MB8(x, 448)
48#else
49#error unsupported order
50#endif
51
52
53static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2)
54{
55 int res, t;
56#if ORDER > 256
57 int cnt = ORDER>>8;
58#endif
59
60 asm volatile (
61#if ORDER > 256
62 "pxor %%mm2, %%mm2 \n"
63 "1: \n"
64#else
65 "movq (%[v1]), %%mm2 \n"
66 "movq %%mm2, %%mm0 \n"
67 "pmaddwd (%[f2]), %%mm2 \n"
68 "paddw (%[s2]), %%mm0 \n"
69 "movq %%mm0, (%[v1]) \n"
70#endif
71
72#define SP_ADD_BLOCK(n) \
73 "movq " #n "(%[v1]), %%mm1 \n" \
74 "movq %%mm1, %%mm0 \n" \
75 "pmaddwd " #n "(%[f2]), %%mm1 \n" \
76 "paddw " #n "(%[s2]), %%mm0 \n" \
77 "movq %%mm0, " #n "(%[v1]) \n" \
78 "paddd %%mm1, %%mm2 \n"
79
80REPEAT_MB(SP_ADD_BLOCK)
81
82#if ORDER > 256
83 "add $512, %[v1] \n"
84 "add $512, %[s2] \n"
85 "add $512, %[f2] \n"
86 "dec %[cnt] \n"
87 "jne 1b \n"
88#endif
89
90 "movd %%mm2, %[t] \n"
91 "psrlq $32, %%mm2 \n"
92 "movd %%mm2, %[res] \n"
93 "add %[t], %[res] \n"
94 : /* outputs */
95#if ORDER > 256
96 [cnt]"+r"(cnt),
97 [s2] "+r"(s2),
98 [res]"=r"(res),
99 [t] "=r"(t)
100 : /* inputs */
101 [v1]"2"(v1),
102 [f2]"3"(f2)
103#else
104 [res]"=r"(res),
105 [t] "=r"(t)
106 : /* inputs */
107 [v1]"r"(v1),
108 [f2]"r"(f2),
109 [s2]"r"(s2)
110#endif
111 : /* clobbers */
112 "mm0", "mm1", "mm2"
113 );
114 return res;
115}
116
117static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2)
118{
119 int res, t;
120#if ORDER > 256
121 int cnt = ORDER>>8;
122#endif
123
124 asm volatile (
125#if ORDER > 256
126 "pxor %%mm2, %%mm2 \n"
127 "1: \n"
128#else
129 "movq (%[v1]), %%mm2 \n"
130 "movq %%mm2, %%mm0 \n"
131 "pmaddwd (%[f2]), %%mm2 \n"
132 "psubw (%[s2]), %%mm0 \n"
133 "movq %%mm0, (%[v1]) \n"
134#endif
135
136#define SP_SUB_BLOCK(n) \
137 "movq " #n "(%[v1]), %%mm1 \n" \
138 "movq %%mm1, %%mm0 \n" \
139 "pmaddwd " #n "(%[f2]), %%mm1 \n" \
140 "psubw " #n "(%[s2]), %%mm0 \n" \
141 "movq %%mm0, " #n "(%[v1]) \n" \
142 "paddd %%mm1, %%mm2 \n"
143
144REPEAT_MB(SP_SUB_BLOCK)
145
146#if ORDER > 256
147 "add $512, %[v1] \n"
148 "add $512, %[s2] \n"
149 "add $512, %[f2] \n"
150 "dec %[cnt] \n"
151 "jne 1b \n"
152#endif
153
154 "movd %%mm2, %[t] \n"
155 "psrlq $32, %%mm2 \n"
156 "movd %%mm2, %[res] \n"
157 "add %[t], %[res] \n"
158 : /* outputs */
159#if ORDER > 256
160 [cnt]"+r"(cnt),
161 [s2] "+r"(s2),
162 [res]"=r"(res),
163 [t] "=r"(t)
164 : /* inputs */
165 [v1]"2"(v1),
166 [f2]"3"(f2)
167#else
168 [res]"=r"(res),
169 [t] "=r"(t)
170 : /* inputs */
171 [v1]"r"(v1),
172 [f2]"r"(f2),
173 [s2]"r"(s2)
174#endif
175 : /* clobbers */
176 "mm0", "mm1", "mm2"
177 );
178 return res;
179}
180
181static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
182{
183 int res, t;
184#if ORDER > 256
185 int cnt = ORDER>>8;
186#endif
187
188 asm volatile (
189#if ORDER > 256
190 "pxor %%mm1, %%mm1 \n"
191 "1: \n"
192#else
193 "movq (%[v1]), %%mm1 \n"
194 "pmaddwd (%[v2]), %%mm1 \n"
195#endif
196
197#define SP_BLOCK(n) \
198 "movq " #n "(%[v1]), %%mm0 \n" \
199 "pmaddwd " #n "(%[v2]), %%mm0 \n" \
200 "paddd %%mm0, %%mm1 \n"
201
202REPEAT_MB(SP_BLOCK)
203
204#if ORDER > 256
205 "add $512, %[v1] \n"
206 "add $512, %[v2] \n"
207 "dec %[cnt] \n"
208 "jne 1b \n"
209#endif
210
211 "movd %%mm1, %[t] \n"
212 "psrlq $32, %%mm1 \n"
213 "movd %%mm1, %[res] \n"
214 "add %[t], %[res] \n"
215 : /* outputs */
216#if ORDER > 256
217 [cnt]"+r"(cnt),
218 [res]"=r"(res),
219 [t] "=r"(t)
220 : /* inputs */
221 [v1]"1"(v1),
222 [v2]"2"(v2)
223#else
224 [res]"=r"(res),
225 [t] "=r"(t)
226 : /* inputs */
227 [v1]"r"(v1),
228 [v2]"r"(v2)
229#endif
230 : /* clobbers */
231 "mm0", "mm1"
232 );
233 return res;
234}
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h b/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h
new file mode 100644
index 0000000000..d6bb9b0d9c
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h
@@ -0,0 +1,201 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9ARMv4 vector math copyright (C) 2008 Jens Arnold
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24
25*/
26
27#define FUSED_VECTOR_MATH
28
29#if ORDER > 32
30#define REPEAT_BLOCK(x) x x x x x x x x
31#elif ORDER > 16
32#define REPEAT_BLOCK(x) x x x x x x x
33#else
34#define REPEAT_BLOCK(x) x x x
35#endif
36
37/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
38static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
39{
40 int res;
41#if ORDER > 32
42 int cnt = ORDER>>5;
43#endif
44
45 asm volatile (
46#if ORDER > 32
47 "mov %[res], #0 \n"
48 "1: \n"
49#else
50 "ldmia %[v1], {r0-r3} \n"
51 "ldmia %[f2]!, {r4-r7} \n"
52 "mul %[res], r4, r0 \n"
53 "mla %[res], r5, r1, %[res] \n"
54 "mla %[res], r6, r2, %[res] \n"
55 "mla %[res], r7, r3, %[res] \n"
56 "ldmia %[s2]!, {r4-r7} \n"
57 "add r0, r0, r4 \n"
58 "add r1, r1, r5 \n"
59 "add r2, r2, r6 \n"
60 "add r3, r3, r7 \n"
61 "stmia %[v1]!, {r0-r3} \n"
62#endif
63 REPEAT_BLOCK(
64 "ldmia %[v1], {r0-r3} \n"
65 "ldmia %[f2]!, {r4-r7} \n"
66 "mla %[res], r4, r0, %[res] \n"
67 "mla %[res], r5, r1, %[res] \n"
68 "mla %[res], r6, r2, %[res] \n"
69 "mla %[res], r7, r3, %[res] \n"
70 "ldmia %[s2]!, {r4-r7} \n"
71 "add r0, r0, r4 \n"
72 "add r1, r1, r5 \n"
73 "add r2, r2, r6 \n"
74 "add r3, r3, r7 \n"
75 "stmia %[v1]!, {r0-r3} \n"
76 )
77#if ORDER > 32
78 "subs %[cnt], %[cnt], #1 \n"
79 "bne 1b \n"
80#endif
81 : /* outputs */
82#if ORDER > 32
83 [cnt]"+r"(cnt),
84#endif
85 [v1] "+r"(v1),
86 [f2] "+r"(f2),
87 [s2] "+r"(s2),
88 [res]"=r"(res)
89 : /* inputs */
90 : /* clobbers */
91 "r0", "r1", "r2", "r3", "r4",
92 "r5", "r6", "r7", "cc", "memory"
93 );
94 return res;
95}
96
97/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) */
98static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
99{
100 int res;
101#if ORDER > 32
102 int cnt = ORDER>>5;
103#endif
104
105 asm volatile (
106#if ORDER > 32
107 "mov %[res], #0 \n"
108 "1: \n"
109#else
110 "ldmia %[v1], {r0-r3} \n"
111 "ldmia %[f2]!, {r4-r7} \n"
112 "mul %[res], r4, r0 \n"
113 "mla %[res], r5, r1, %[res] \n"
114 "mla %[res], r6, r2, %[res] \n"
115 "mla %[res], r7, r3, %[res] \n"
116 "ldmia %[s2]!, {r4-r7} \n"
117 "sub r0, r0, r4 \n"
118 "sub r1, r1, r5 \n"
119 "sub r2, r2, r6 \n"
120 "sub r3, r3, r7 \n"
121 "stmia %[v1]!, {r0-r3} \n"
122#endif
123 REPEAT_BLOCK(
124 "ldmia %[v1], {r0-r3} \n"
125 "ldmia %[f2]!, {r4-r7} \n"
126 "mla %[res], r4, r0, %[res] \n"
127 "mla %[res], r5, r1, %[res] \n"
128 "mla %[res], r6, r2, %[res] \n"
129 "mla %[res], r7, r3, %[res] \n"
130 "ldmia %[s2]!, {r4-r7} \n"
131 "sub r0, r0, r4 \n"
132 "sub r1, r1, r5 \n"
133 "sub r2, r2, r6 \n"
134 "sub r3, r3, r7 \n"
135 "stmia %[v1]!, {r0-r3} \n"
136 )
137#if ORDER > 32
138 "subs %[cnt], %[cnt], #1 \n"
139 "bne 1b \n"
140#endif
141 : /* outputs */
142#if ORDER > 32
143 [cnt]"+r"(cnt),
144#endif
145 [v1] "+r"(v1),
146 [f2] "+r"(f2),
147 [s2] "+r"(s2),
148 [res]"=r"(res)
149 : /* inputs */
150 : /* clobbers */
151 "r0", "r1", "r2", "r3", "r4",
152 "r5", "r6", "r7", "cc", "memory"
153 );
154 return res;
155}
156
157static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
158{
159 int res;
160#if ORDER > 32
161 int cnt = ORDER>>5;
162#endif
163
164 asm volatile (
165#if ORDER > 32
166 "mov %[res], #0 \n"
167 "1: \n"
168#else
169 "ldmia %[v1]!, {r0-r3} \n"
170 "ldmia %[v2]!, {r4-r7} \n"
171 "mul %[res], r4, r0 \n"
172 "mla %[res], r5, r1, %[res] \n"
173 "mla %[res], r6, r2, %[res] \n"
174 "mla %[res], r7, r3, %[res] \n"
175#endif
176 REPEAT_BLOCK(
177 "ldmia %[v1]!, {r0-r3} \n"
178 "ldmia %[v2]!, {r4-r7} \n"
179 "mla %[res], r4, r0, %[res] \n"
180 "mla %[res], r5, r1, %[res] \n"
181 "mla %[res], r6, r2, %[res] \n"
182 "mla %[res], r7, r3, %[res] \n"
183 )
184#if ORDER > 32
185 "subs %[cnt], %[cnt], #1 \n"
186 "bne 1b \n"
187#endif
188 : /* outputs */
189#if ORDER > 32
190 [cnt]"+r"(cnt),
191#endif
192 [v1] "+r"(v1),
193 [v2] "+r"(v2),
194 [res]"=r"(res)
195 : /* inputs */
196 : /* clobbers */
197 "r0", "r1", "r2", "r3",
198 "r4", "r5", "r6", "r7", "cc", "memory"
199 );
200 return res;
201}
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math_generic.h b/lib/rbcodec/codecs/demac/libdemac/vector_math_generic.h
new file mode 100644
index 0000000000..00bf07a007
--- /dev/null
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math_generic.h
@@ -0,0 +1,160 @@
1/*
2
3libdemac - A Monkey's Audio decoder
4
5$Id$
6
7Copyright (C) Dave Chapman 2007
8
9This program is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2 of the License, or
12(at your option) any later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
22
23*/
24
25#include "demac_config.h"
26
27static inline void vector_add(filter_int* v1, filter_int* v2)
28{
29#if ORDER > 32
30 int order = (ORDER >> 5);
31 while (order--)
32#endif
33 {
34 *v1++ += *v2++;
35 *v1++ += *v2++;
36 *v1++ += *v2++;
37 *v1++ += *v2++;
38 *v1++ += *v2++;
39 *v1++ += *v2++;
40 *v1++ += *v2++;
41 *v1++ += *v2++;
42 *v1++ += *v2++;
43 *v1++ += *v2++;
44 *v1++ += *v2++;
45 *v1++ += *v2++;
46 *v1++ += *v2++;
47 *v1++ += *v2++;
48 *v1++ += *v2++;
49 *v1++ += *v2++;
50#if ORDER > 16
51 *v1++ += *v2++;
52 *v1++ += *v2++;
53 *v1++ += *v2++;
54 *v1++ += *v2++;
55 *v1++ += *v2++;
56 *v1++ += *v2++;
57 *v1++ += *v2++;
58 *v1++ += *v2++;
59 *v1++ += *v2++;
60 *v1++ += *v2++;
61 *v1++ += *v2++;
62 *v1++ += *v2++;
63 *v1++ += *v2++;
64 *v1++ += *v2++;
65 *v1++ += *v2++;
66 *v1++ += *v2++;
67#endif
68 }
69}
70
71static inline void vector_sub(filter_int* v1, filter_int* v2)
72{
73#if ORDER > 32
74 int order = (ORDER >> 5);
75 while (order--)
76#endif
77 {
78 *v1++ -= *v2++;
79 *v1++ -= *v2++;
80 *v1++ -= *v2++;
81 *v1++ -= *v2++;
82 *v1++ -= *v2++;
83 *v1++ -= *v2++;
84 *v1++ -= *v2++;
85 *v1++ -= *v2++;
86 *v1++ -= *v2++;
87 *v1++ -= *v2++;
88 *v1++ -= *v2++;
89 *v1++ -= *v2++;
90 *v1++ -= *v2++;
91 *v1++ -= *v2++;
92 *v1++ -= *v2++;
93 *v1++ -= *v2++;
94#if ORDER > 16
95 *v1++ -= *v2++;
96 *v1++ -= *v2++;
97 *v1++ -= *v2++;
98 *v1++ -= *v2++;
99 *v1++ -= *v2++;
100 *v1++ -= *v2++;
101 *v1++ -= *v2++;
102 *v1++ -= *v2++;
103 *v1++ -= *v2++;
104 *v1++ -= *v2++;
105 *v1++ -= *v2++;
106 *v1++ -= *v2++;
107 *v1++ -= *v2++;
108 *v1++ -= *v2++;
109 *v1++ -= *v2++;
110 *v1++ -= *v2++;
111#endif
112 }
113}
114
115static inline int32_t scalarproduct(filter_int* v1, filter_int* v2)
116{
117 int res = 0;
118
119#if ORDER > 32
120 int order = (ORDER >> 5);
121 while (order--)
122#endif
123 {
124 res += *v1++ * *v2++;
125 res += *v1++ * *v2++;
126 res += *v1++ * *v2++;
127 res += *v1++ * *v2++;
128 res += *v1++ * *v2++;
129 res += *v1++ * *v2++;
130 res += *v1++ * *v2++;
131 res += *v1++ * *v2++;
132 res += *v1++ * *v2++;
133 res += *v1++ * *v2++;
134 res += *v1++ * *v2++;
135 res += *v1++ * *v2++;
136 res += *v1++ * *v2++;
137 res += *v1++ * *v2++;
138 res += *v1++ * *v2++;
139 res += *v1++ * *v2++;
140#if ORDER > 16
141 res += *v1++ * *v2++;
142 res += *v1++ * *v2++;
143 res += *v1++ * *v2++;
144 res += *v1++ * *v2++;
145 res += *v1++ * *v2++;
146 res += *v1++ * *v2++;
147 res += *v1++ * *v2++;
148 res += *v1++ * *v2++;
149 res += *v1++ * *v2++;
150 res += *v1++ * *v2++;
151 res += *v1++ * *v2++;
152 res += *v1++ * *v2++;
153 res += *v1++ * *v2++;
154 res += *v1++ * *v2++;
155 res += *v1++ * *v2++;
156 res += *v1++ * *v2++;
157#endif
158 }
159 return res;
160}