diff options
author | Mohamed Tarek <mt@rockbox.org> | 2010-08-07 13:08:38 +0000 |
---|---|---|
committer | Mohamed Tarek <mt@rockbox.org> | 2010-08-07 13:08:38 +0000 |
commit | eb369699c6b7258df92169d0e1721aa5b2c108d8 (patch) | |
tree | 259aed54fd6029da0990d33a255dcc5d458a313d /apps/codecs/libwmavoice | |
parent | fb26f52697f1bb215375b4acaa626ff36d8d4208 (diff) | |
download | rockbox-eb369699c6b7258df92169d0e1721aa5b2c108d8.tar.gz rockbox-eb369699c6b7258df92169d0e1721aa5b2c108d8.zip |
Remove dsputil.[ch], they are not needed.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27743 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libwmavoice')
-rw-r--r-- | apps/codecs/libwmavoice/Makefile | 1 | ||||
-rw-r--r-- | apps/codecs/libwmavoice/dsputil.c | 4535 | ||||
-rw-r--r-- | apps/codecs/libwmavoice/dsputil.h | 800 |
3 files changed, 0 insertions, 5336 deletions
diff --git a/apps/codecs/libwmavoice/Makefile b/apps/codecs/libwmavoice/Makefile index f1d987f40c..2bd7b94f9b 100644 --- a/apps/codecs/libwmavoice/Makefile +++ b/apps/codecs/libwmavoice/Makefile | |||
@@ -31,7 +31,6 @@ get_bits.h\ | |||
31 | wmavoice_data.h\ | 31 | wmavoice_data.h\ |
32 | avcodec.h\ | 32 | avcodec.h\ |
33 | fft.h\ | 33 | fft.h\ |
34 | dsputil.h\ | ||
35 | acelp_filters.h\ | 34 | acelp_filters.h\ |
36 | celp_filters.h\ | 35 | celp_filters.h\ |
37 | put_bits.h\ | 36 | put_bits.h\ |
diff --git a/apps/codecs/libwmavoice/dsputil.c b/apps/codecs/libwmavoice/dsputil.c deleted file mode 100644 index 534f03f885..0000000000 --- a/apps/codecs/libwmavoice/dsputil.c +++ /dev/null | |||
@@ -1,4535 +0,0 @@ | |||
1 | /* | ||
2 | * DSP utils | ||
3 | * Copyright (c) 2000, 2001 Fabrice Bellard | ||
4 | * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | ||
5 | * | ||
6 | * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> | ||
7 | * | ||
8 | * This file is part of FFmpeg. | ||
9 | * | ||
10 | * FFmpeg is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU Lesser General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2.1 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * FFmpeg is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * Lesser General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU Lesser General Public | ||
21 | * License along with FFmpeg; if not, write to the Free Software | ||
22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
23 | */ | ||
24 | |||
25 | /** | ||
26 | * @file | ||
27 | * DSP utils | ||
28 | */ | ||
29 | |||
30 | #include "avcodec.h" | ||
31 | #include "dsputil.h" | ||
32 | #include "simple_idct.h" | ||
33 | #include "faandct.h" | ||
34 | #include "faanidct.h" | ||
35 | #include "mathops.h" | ||
36 | #include "mpegvideo.h" | ||
37 | #include "config.h" | ||
38 | #include "lpc.h" | ||
39 | #include "ac3dec.h" | ||
40 | #include "vorbis.h" | ||
41 | #include "png.h" | ||
42 | #include "vp8dsp.h" | ||
43 | |||
44 | uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; | ||
45 | uint32_t ff_squareTbl[512] = {0, }; | ||
46 | |||
47 | // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | ||
48 | #define pb_7f (~0UL/255 * 0x7f) | ||
49 | #define pb_80 (~0UL/255 * 0x80) | ||
50 | |||
51 | const uint8_t ff_zigzag_direct[64] = { | ||
52 | 0, 1, 8, 16, 9, 2, 3, 10, | ||
53 | 17, 24, 32, 25, 18, 11, 4, 5, | ||
54 | 12, 19, 26, 33, 40, 48, 41, 34, | ||
55 | 27, 20, 13, 6, 7, 14, 21, 28, | ||
56 | 35, 42, 49, 56, 57, 50, 43, 36, | ||
57 | 29, 22, 15, 23, 30, 37, 44, 51, | ||
58 | 58, 59, 52, 45, 38, 31, 39, 46, | ||
59 | 53, 60, 61, 54, 47, 55, 62, 63 | ||
60 | }; | ||
61 | |||
62 | /* Specific zigzag scan for 248 idct. NOTE that unlike the | ||
63 | specification, we interleave the fields */ | ||
64 | const uint8_t ff_zigzag248_direct[64] = { | ||
65 | 0, 8, 1, 9, 16, 24, 2, 10, | ||
66 | 17, 25, 32, 40, 48, 56, 33, 41, | ||
67 | 18, 26, 3, 11, 4, 12, 19, 27, | ||
68 | 34, 42, 49, 57, 50, 58, 35, 43, | ||
69 | 20, 28, 5, 13, 6, 14, 21, 29, | ||
70 | 36, 44, 51, 59, 52, 60, 37, 45, | ||
71 | 22, 30, 7, 15, 23, 31, 38, 46, | ||
72 | 53, 61, 54, 62, 39, 47, 55, 63, | ||
73 | }; | ||
74 | |||
75 | /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | ||
76 | DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64]; | ||
77 | |||
78 | const uint8_t ff_alternate_horizontal_scan[64] = { | ||
79 | 0, 1, 2, 3, 8, 9, 16, 17, | ||
80 | 10, 11, 4, 5, 6, 7, 15, 14, | ||
81 | 13, 12, 19, 18, 24, 25, 32, 33, | ||
82 | 26, 27, 20, 21, 22, 23, 28, 29, | ||
83 | 30, 31, 34, 35, 40, 41, 48, 49, | ||
84 | 42, 43, 36, 37, 38, 39, 44, 45, | ||
85 | 46, 47, 50, 51, 56, 57, 58, 59, | ||
86 | 52, 53, 54, 55, 60, 61, 62, 63, | ||
87 | }; | ||
88 | |||
89 | const uint8_t ff_alternate_vertical_scan[64] = { | ||
90 | 0, 8, 16, 24, 1, 9, 2, 10, | ||
91 | 17, 25, 32, 40, 48, 56, 57, 49, | ||
92 | 41, 33, 26, 18, 3, 11, 4, 12, | ||
93 | 19, 27, 34, 42, 50, 58, 35, 43, | ||
94 | 51, 59, 20, 28, 5, 13, 6, 14, | ||
95 | 21, 29, 36, 44, 52, 60, 37, 45, | ||
96 | 53, 61, 22, 30, 7, 15, 23, 31, | ||
97 | 38, 46, 54, 62, 39, 47, 55, 63, | ||
98 | }; | ||
99 | |||
100 | /* Input permutation for the simple_idct_mmx */ | ||
101 | static const uint8_t simple_mmx_permutation[64]={ | ||
102 | 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | ||
103 | 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | ||
104 | 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | ||
105 | 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | ||
106 | 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | ||
107 | 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | ||
108 | 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | ||
109 | 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | ||
110 | }; | ||
111 | |||
112 | static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; | ||
113 | |||
114 | void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){ | ||
115 | int i; | ||
116 | int end; | ||
117 | |||
118 | st->scantable= src_scantable; | ||
119 | |||
120 | for(i=0; i<64; i++){ | ||
121 | int j; | ||
122 | j = src_scantable[i]; | ||
123 | st->permutated[i] = permutation[j]; | ||
124 | #if ARCH_PPC | ||
125 | st->inverse[j] = i; | ||
126 | #endif | ||
127 | } | ||
128 | |||
129 | end=-1; | ||
130 | for(i=0; i<64; i++){ | ||
131 | int j; | ||
132 | j = st->permutated[i]; | ||
133 | if(j>end) end=j; | ||
134 | st->raster_end[i]= end; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | static int pix_sum_c(uint8_t * pix, int line_size) | ||
139 | { | ||
140 | int s, i, j; | ||
141 | |||
142 | s = 0; | ||
143 | for (i = 0; i < 16; i++) { | ||
144 | for (j = 0; j < 16; j += 8) { | ||
145 | s += pix[0]; | ||
146 | s += pix[1]; | ||
147 | s += pix[2]; | ||
148 | s += pix[3]; | ||
149 | s += pix[4]; | ||
150 | s += pix[5]; | ||
151 | s += pix[6]; | ||
152 | s += pix[7]; | ||
153 | pix += 8; | ||
154 | } | ||
155 | pix += line_size - 16; | ||
156 | } | ||
157 | return s; | ||
158 | } | ||
159 | |||
160 | static int pix_norm1_c(uint8_t * pix, int line_size) | ||
161 | { | ||
162 | int s, i, j; | ||
163 | uint32_t *sq = ff_squareTbl + 256; | ||
164 | |||
165 | s = 0; | ||
166 | for (i = 0; i < 16; i++) { | ||
167 | for (j = 0; j < 16; j += 8) { | ||
168 | #if 0 | ||
169 | s += sq[pix[0]]; | ||
170 | s += sq[pix[1]]; | ||
171 | s += sq[pix[2]]; | ||
172 | s += sq[pix[3]]; | ||
173 | s += sq[pix[4]]; | ||
174 | s += sq[pix[5]]; | ||
175 | s += sq[pix[6]]; | ||
176 | s += sq[pix[7]]; | ||
177 | #else | ||
178 | #if LONG_MAX > 2147483647 | ||
179 | register uint64_t x=*(uint64_t*)pix; | ||
180 | s += sq[x&0xff]; | ||
181 | s += sq[(x>>8)&0xff]; | ||
182 | s += sq[(x>>16)&0xff]; | ||
183 | s += sq[(x>>24)&0xff]; | ||
184 | s += sq[(x>>32)&0xff]; | ||
185 | s += sq[(x>>40)&0xff]; | ||
186 | s += sq[(x>>48)&0xff]; | ||
187 | s += sq[(x>>56)&0xff]; | ||
188 | #else | ||
189 | register uint32_t x=*(uint32_t*)pix; | ||
190 | s += sq[x&0xff]; | ||
191 | s += sq[(x>>8)&0xff]; | ||
192 | s += sq[(x>>16)&0xff]; | ||
193 | s += sq[(x>>24)&0xff]; | ||
194 | x=*(uint32_t*)(pix+4); | ||
195 | s += sq[x&0xff]; | ||
196 | s += sq[(x>>8)&0xff]; | ||
197 | s += sq[(x>>16)&0xff]; | ||
198 | s += sq[(x>>24)&0xff]; | ||
199 | #endif | ||
200 | #endif | ||
201 | pix += 8; | ||
202 | } | ||
203 | pix += line_size - 16; | ||
204 | } | ||
205 | return s; | ||
206 | } | ||
207 | |||
208 | static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){ | ||
209 | int i; | ||
210 | |||
211 | for(i=0; i+8<=w; i+=8){ | ||
212 | dst[i+0]= av_bswap32(src[i+0]); | ||
213 | dst[i+1]= av_bswap32(src[i+1]); | ||
214 | dst[i+2]= av_bswap32(src[i+2]); | ||
215 | dst[i+3]= av_bswap32(src[i+3]); | ||
216 | dst[i+4]= av_bswap32(src[i+4]); | ||
217 | dst[i+5]= av_bswap32(src[i+5]); | ||
218 | dst[i+6]= av_bswap32(src[i+6]); | ||
219 | dst[i+7]= av_bswap32(src[i+7]); | ||
220 | } | ||
221 | for(;i<w; i++){ | ||
222 | dst[i+0]= av_bswap32(src[i+0]); | ||
223 | } | ||
224 | } | ||
225 | |||
226 | static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) | ||
227 | { | ||
228 | int s, i; | ||
229 | uint32_t *sq = ff_squareTbl + 256; | ||
230 | |||
231 | s = 0; | ||
232 | for (i = 0; i < h; i++) { | ||
233 | s += sq[pix1[0] - pix2[0]]; | ||
234 | s += sq[pix1[1] - pix2[1]]; | ||
235 | s += sq[pix1[2] - pix2[2]]; | ||
236 | s += sq[pix1[3] - pix2[3]]; | ||
237 | pix1 += line_size; | ||
238 | pix2 += line_size; | ||
239 | } | ||
240 | return s; | ||
241 | } | ||
242 | |||
243 | static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) | ||
244 | { | ||
245 | int s, i; | ||
246 | uint32_t *sq = ff_squareTbl + 256; | ||
247 | |||
248 | s = 0; | ||
249 | for (i = 0; i < h; i++) { | ||
250 | s += sq[pix1[0] - pix2[0]]; | ||
251 | s += sq[pix1[1] - pix2[1]]; | ||
252 | s += sq[pix1[2] - pix2[2]]; | ||
253 | s += sq[pix1[3] - pix2[3]]; | ||
254 | s += sq[pix1[4] - pix2[4]]; | ||
255 | s += sq[pix1[5] - pix2[5]]; | ||
256 | s += sq[pix1[6] - pix2[6]]; | ||
257 | s += sq[pix1[7] - pix2[7]]; | ||
258 | pix1 += line_size; | ||
259 | pix2 += line_size; | ||
260 | } | ||
261 | return s; | ||
262 | } | ||
263 | |||
264 | static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
265 | { | ||
266 | int s, i; | ||
267 | uint32_t *sq = ff_squareTbl + 256; | ||
268 | |||
269 | s = 0; | ||
270 | for (i = 0; i < h; i++) { | ||
271 | s += sq[pix1[ 0] - pix2[ 0]]; | ||
272 | s += sq[pix1[ 1] - pix2[ 1]]; | ||
273 | s += sq[pix1[ 2] - pix2[ 2]]; | ||
274 | s += sq[pix1[ 3] - pix2[ 3]]; | ||
275 | s += sq[pix1[ 4] - pix2[ 4]]; | ||
276 | s += sq[pix1[ 5] - pix2[ 5]]; | ||
277 | s += sq[pix1[ 6] - pix2[ 6]]; | ||
278 | s += sq[pix1[ 7] - pix2[ 7]]; | ||
279 | s += sq[pix1[ 8] - pix2[ 8]]; | ||
280 | s += sq[pix1[ 9] - pix2[ 9]]; | ||
281 | s += sq[pix1[10] - pix2[10]]; | ||
282 | s += sq[pix1[11] - pix2[11]]; | ||
283 | s += sq[pix1[12] - pix2[12]]; | ||
284 | s += sq[pix1[13] - pix2[13]]; | ||
285 | s += sq[pix1[14] - pix2[14]]; | ||
286 | s += sq[pix1[15] - pix2[15]]; | ||
287 | |||
288 | pix1 += line_size; | ||
289 | pix2 += line_size; | ||
290 | } | ||
291 | return s; | ||
292 | } | ||
293 | |||
294 | /* draw the edges of width 'w' of an image of size width, height */ | ||
295 | //FIXME check that this is ok for mpeg4 interlaced | ||
296 | static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w) | ||
297 | { | ||
298 | uint8_t *ptr, *last_line; | ||
299 | int i; | ||
300 | |||
301 | last_line = buf + (height - 1) * wrap; | ||
302 | for(i=0;i<w;i++) { | ||
303 | /* top and bottom */ | ||
304 | memcpy(buf - (i + 1) * wrap, buf, width); | ||
305 | memcpy(last_line + (i + 1) * wrap, last_line, width); | ||
306 | } | ||
307 | /* left and right */ | ||
308 | ptr = buf; | ||
309 | for(i=0;i<height;i++) { | ||
310 | memset(ptr - w, ptr[0], w); | ||
311 | memset(ptr + width, ptr[width-1], w); | ||
312 | ptr += wrap; | ||
313 | } | ||
314 | /* corners */ | ||
315 | for(i=0;i<w;i++) { | ||
316 | memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */ | ||
317 | memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */ | ||
318 | memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */ | ||
319 | memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */ | ||
320 | } | ||
321 | } | ||
322 | |||
323 | /** | ||
324 | * Copy a rectangular area of samples to a temporary buffer and replicate the border samples. | ||
325 | * @param buf destination buffer | ||
326 | * @param src source buffer | ||
327 | * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers | ||
328 | * @param block_w width of block | ||
329 | * @param block_h height of block | ||
330 | * @param src_x x coordinate of the top left sample of the block in the source buffer | ||
331 | * @param src_y y coordinate of the top left sample of the block in the source buffer | ||
332 | * @param w width of the source buffer | ||
333 | * @param h height of the source buffer | ||
334 | */ | ||
335 | void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h, | ||
336 | int src_x, int src_y, int w, int h){ | ||
337 | int x, y; | ||
338 | int start_y, start_x, end_y, end_x; | ||
339 | |||
340 | if(src_y>= h){ | ||
341 | src+= (h-1-src_y)*linesize; | ||
342 | src_y=h-1; | ||
343 | }else if(src_y<=-block_h){ | ||
344 | src+= (1-block_h-src_y)*linesize; | ||
345 | src_y=1-block_h; | ||
346 | } | ||
347 | if(src_x>= w){ | ||
348 | src+= (w-1-src_x); | ||
349 | src_x=w-1; | ||
350 | }else if(src_x<=-block_w){ | ||
351 | src+= (1-block_w-src_x); | ||
352 | src_x=1-block_w; | ||
353 | } | ||
354 | |||
355 | start_y= FFMAX(0, -src_y); | ||
356 | start_x= FFMAX(0, -src_x); | ||
357 | end_y= FFMIN(block_h, h-src_y); | ||
358 | end_x= FFMIN(block_w, w-src_x); | ||
359 | |||
360 | // copy existing part | ||
361 | for(y=start_y; y<end_y; y++){ | ||
362 | for(x=start_x; x<end_x; x++){ | ||
363 | buf[x + y*linesize]= src[x + y*linesize]; | ||
364 | } | ||
365 | } | ||
366 | |||
367 | //top | ||
368 | for(y=0; y<start_y; y++){ | ||
369 | for(x=start_x; x<end_x; x++){ | ||
370 | buf[x + y*linesize]= buf[x + start_y*linesize]; | ||
371 | } | ||
372 | } | ||
373 | |||
374 | //bottom | ||
375 | for(y=end_y; y<block_h; y++){ | ||
376 | for(x=start_x; x<end_x; x++){ | ||
377 | buf[x + y*linesize]= buf[x + (end_y-1)*linesize]; | ||
378 | } | ||
379 | } | ||
380 | |||
381 | for(y=0; y<block_h; y++){ | ||
382 | //left | ||
383 | for(x=0; x<start_x; x++){ | ||
384 | buf[x + y*linesize]= buf[start_x + y*linesize]; | ||
385 | } | ||
386 | |||
387 | //right | ||
388 | for(x=end_x; x<block_w; x++){ | ||
389 | buf[x + y*linesize]= buf[end_x - 1 + y*linesize]; | ||
390 | } | ||
391 | } | ||
392 | } | ||
393 | |||
394 | static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) | ||
395 | { | ||
396 | int i; | ||
397 | |||
398 | /* read the pixels */ | ||
399 | for(i=0;i<8;i++) { | ||
400 | block[0] = pixels[0]; | ||
401 | block[1] = pixels[1]; | ||
402 | block[2] = pixels[2]; | ||
403 | block[3] = pixels[3]; | ||
404 | block[4] = pixels[4]; | ||
405 | block[5] = pixels[5]; | ||
406 | block[6] = pixels[6]; | ||
407 | block[7] = pixels[7]; | ||
408 | pixels += line_size; | ||
409 | block += 8; | ||
410 | } | ||
411 | } | ||
412 | |||
413 | static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, | ||
414 | const uint8_t *s2, int stride){ | ||
415 | int i; | ||
416 | |||
417 | /* read the pixels */ | ||
418 | for(i=0;i<8;i++) { | ||
419 | block[0] = s1[0] - s2[0]; | ||
420 | block[1] = s1[1] - s2[1]; | ||
421 | block[2] = s1[2] - s2[2]; | ||
422 | block[3] = s1[3] - s2[3]; | ||
423 | block[4] = s1[4] - s2[4]; | ||
424 | block[5] = s1[5] - s2[5]; | ||
425 | block[6] = s1[6] - s2[6]; | ||
426 | block[7] = s1[7] - s2[7]; | ||
427 | s1 += stride; | ||
428 | s2 += stride; | ||
429 | block += 8; | ||
430 | } | ||
431 | } | ||
432 | |||
433 | |||
434 | static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
435 | int line_size) | ||
436 | { | ||
437 | int i; | ||
438 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
439 | |||
440 | /* read the pixels */ | ||
441 | for(i=0;i<8;i++) { | ||
442 | pixels[0] = cm[block[0]]; | ||
443 | pixels[1] = cm[block[1]]; | ||
444 | pixels[2] = cm[block[2]]; | ||
445 | pixels[3] = cm[block[3]]; | ||
446 | pixels[4] = cm[block[4]]; | ||
447 | pixels[5] = cm[block[5]]; | ||
448 | pixels[6] = cm[block[6]]; | ||
449 | pixels[7] = cm[block[7]]; | ||
450 | |||
451 | pixels += line_size; | ||
452 | block += 8; | ||
453 | } | ||
454 | } | ||
455 | |||
456 | static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
457 | int line_size) | ||
458 | { | ||
459 | int i; | ||
460 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
461 | |||
462 | /* read the pixels */ | ||
463 | for(i=0;i<4;i++) { | ||
464 | pixels[0] = cm[block[0]]; | ||
465 | pixels[1] = cm[block[1]]; | ||
466 | pixels[2] = cm[block[2]]; | ||
467 | pixels[3] = cm[block[3]]; | ||
468 | |||
469 | pixels += line_size; | ||
470 | block += 8; | ||
471 | } | ||
472 | } | ||
473 | |||
474 | static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
475 | int line_size) | ||
476 | { | ||
477 | int i; | ||
478 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
479 | |||
480 | /* read the pixels */ | ||
481 | for(i=0;i<2;i++) { | ||
482 | pixels[0] = cm[block[0]]; | ||
483 | pixels[1] = cm[block[1]]; | ||
484 | |||
485 | pixels += line_size; | ||
486 | block += 8; | ||
487 | } | ||
488 | } | ||
489 | |||
490 | static void put_signed_pixels_clamped_c(const DCTELEM *block, | ||
491 | uint8_t *restrict pixels, | ||
492 | int line_size) | ||
493 | { | ||
494 | int i, j; | ||
495 | |||
496 | for (i = 0; i < 8; i++) { | ||
497 | for (j = 0; j < 8; j++) { | ||
498 | if (*block < -128) | ||
499 | *pixels = 0; | ||
500 | else if (*block > 127) | ||
501 | *pixels = 255; | ||
502 | else | ||
503 | *pixels = (uint8_t)(*block + 128); | ||
504 | block++; | ||
505 | pixels++; | ||
506 | } | ||
507 | pixels += (line_size - 8); | ||
508 | } | ||
509 | } | ||
510 | |||
511 | static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
512 | int line_size) | ||
513 | { | ||
514 | int i; | ||
515 | |||
516 | /* read the pixels */ | ||
517 | for(i=0;i<8;i++) { | ||
518 | pixels[0] = block[0]; | ||
519 | pixels[1] = block[1]; | ||
520 | pixels[2] = block[2]; | ||
521 | pixels[3] = block[3]; | ||
522 | pixels[4] = block[4]; | ||
523 | pixels[5] = block[5]; | ||
524 | pixels[6] = block[6]; | ||
525 | pixels[7] = block[7]; | ||
526 | |||
527 | pixels += line_size; | ||
528 | block += 8; | ||
529 | } | ||
530 | } | ||
531 | |||
532 | static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
533 | int line_size) | ||
534 | { | ||
535 | int i; | ||
536 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
537 | |||
538 | /* read the pixels */ | ||
539 | for(i=0;i<8;i++) { | ||
540 | pixels[0] = cm[pixels[0] + block[0]]; | ||
541 | pixels[1] = cm[pixels[1] + block[1]]; | ||
542 | pixels[2] = cm[pixels[2] + block[2]]; | ||
543 | pixels[3] = cm[pixels[3] + block[3]]; | ||
544 | pixels[4] = cm[pixels[4] + block[4]]; | ||
545 | pixels[5] = cm[pixels[5] + block[5]]; | ||
546 | pixels[6] = cm[pixels[6] + block[6]]; | ||
547 | pixels[7] = cm[pixels[7] + block[7]]; | ||
548 | pixels += line_size; | ||
549 | block += 8; | ||
550 | } | ||
551 | } | ||
552 | |||
553 | static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
554 | int line_size) | ||
555 | { | ||
556 | int i; | ||
557 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
558 | |||
559 | /* read the pixels */ | ||
560 | for(i=0;i<4;i++) { | ||
561 | pixels[0] = cm[pixels[0] + block[0]]; | ||
562 | pixels[1] = cm[pixels[1] + block[1]]; | ||
563 | pixels[2] = cm[pixels[2] + block[2]]; | ||
564 | pixels[3] = cm[pixels[3] + block[3]]; | ||
565 | pixels += line_size; | ||
566 | block += 8; | ||
567 | } | ||
568 | } | ||
569 | |||
570 | static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
571 | int line_size) | ||
572 | { | ||
573 | int i; | ||
574 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
575 | |||
576 | /* read the pixels */ | ||
577 | for(i=0;i<2;i++) { | ||
578 | pixels[0] = cm[pixels[0] + block[0]]; | ||
579 | pixels[1] = cm[pixels[1] + block[1]]; | ||
580 | pixels += line_size; | ||
581 | block += 8; | ||
582 | } | ||
583 | } | ||
584 | |||
585 | static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | ||
586 | { | ||
587 | int i; | ||
588 | for(i=0;i<8;i++) { | ||
589 | pixels[0] += block[0]; | ||
590 | pixels[1] += block[1]; | ||
591 | pixels[2] += block[2]; | ||
592 | pixels[3] += block[3]; | ||
593 | pixels[4] += block[4]; | ||
594 | pixels[5] += block[5]; | ||
595 | pixels[6] += block[6]; | ||
596 | pixels[7] += block[7]; | ||
597 | pixels += line_size; | ||
598 | block += 8; | ||
599 | } | ||
600 | } | ||
601 | |||
602 | static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | ||
603 | { | ||
604 | int i; | ||
605 | for(i=0;i<4;i++) { | ||
606 | pixels[0] += block[0]; | ||
607 | pixels[1] += block[1]; | ||
608 | pixels[2] += block[2]; | ||
609 | pixels[3] += block[3]; | ||
610 | pixels += line_size; | ||
611 | block += 4; | ||
612 | } | ||
613 | } | ||
614 | |||
615 | static int sum_abs_dctelem_c(DCTELEM *block) | ||
616 | { | ||
617 | int sum=0, i; | ||
618 | for(i=0; i<64; i++) | ||
619 | sum+= FFABS(block[i]); | ||
620 | return sum; | ||
621 | } | ||
622 | |||
623 | static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) | ||
624 | { | ||
625 | int i; | ||
626 | |||
627 | for (i = 0; i < h; i++) { | ||
628 | memset(block, value, 16); | ||
629 | block += line_size; | ||
630 | } | ||
631 | } | ||
632 | |||
633 | static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) | ||
634 | { | ||
635 | int i; | ||
636 | |||
637 | for (i = 0; i < h; i++) { | ||
638 | memset(block, value, 8); | ||
639 | block += line_size; | ||
640 | } | ||
641 | } | ||
642 | |||
643 | static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize) | ||
644 | { | ||
645 | int i, j; | ||
646 | uint16_t *dst1 = (uint16_t *) dst; | ||
647 | uint16_t *dst2 = (uint16_t *)(dst + linesize); | ||
648 | |||
649 | for (j = 0; j < 8; j++) { | ||
650 | for (i = 0; i < 8; i++) { | ||
651 | dst1[i] = dst2[i] = src[i] * 0x0101; | ||
652 | } | ||
653 | src += 8; | ||
654 | dst1 += linesize; | ||
655 | dst2 += linesize; | ||
656 | } | ||
657 | } | ||
658 | |||
659 | #if 0 | ||
660 | |||
661 | #define PIXOP2(OPNAME, OP) \ | ||
662 | static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
663 | {\ | ||
664 | int i;\ | ||
665 | for(i=0; i<h; i++){\ | ||
666 | OP(*((uint64_t*)block), AV_RN64(pixels));\ | ||
667 | pixels+=line_size;\ | ||
668 | block +=line_size;\ | ||
669 | }\ | ||
670 | }\ | ||
671 | \ | ||
672 | static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
673 | {\ | ||
674 | int i;\ | ||
675 | for(i=0; i<h; i++){\ | ||
676 | const uint64_t a= AV_RN64(pixels );\ | ||
677 | const uint64_t b= AV_RN64(pixels+1);\ | ||
678 | OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | ||
679 | pixels+=line_size;\ | ||
680 | block +=line_size;\ | ||
681 | }\ | ||
682 | }\ | ||
683 | \ | ||
684 | static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
685 | {\ | ||
686 | int i;\ | ||
687 | for(i=0; i<h; i++){\ | ||
688 | const uint64_t a= AV_RN64(pixels );\ | ||
689 | const uint64_t b= AV_RN64(pixels+1);\ | ||
690 | OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | ||
691 | pixels+=line_size;\ | ||
692 | block +=line_size;\ | ||
693 | }\ | ||
694 | }\ | ||
695 | \ | ||
696 | static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
697 | {\ | ||
698 | int i;\ | ||
699 | for(i=0; i<h; i++){\ | ||
700 | const uint64_t a= AV_RN64(pixels );\ | ||
701 | const uint64_t b= AV_RN64(pixels+line_size);\ | ||
702 | OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | ||
703 | pixels+=line_size;\ | ||
704 | block +=line_size;\ | ||
705 | }\ | ||
706 | }\ | ||
707 | \ | ||
708 | static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
709 | {\ | ||
710 | int i;\ | ||
711 | for(i=0; i<h; i++){\ | ||
712 | const uint64_t a= AV_RN64(pixels );\ | ||
713 | const uint64_t b= AV_RN64(pixels+line_size);\ | ||
714 | OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | ||
715 | pixels+=line_size;\ | ||
716 | block +=line_size;\ | ||
717 | }\ | ||
718 | }\ | ||
719 | \ | ||
720 | static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
721 | {\ | ||
722 | int i;\ | ||
723 | const uint64_t a= AV_RN64(pixels );\ | ||
724 | const uint64_t b= AV_RN64(pixels+1);\ | ||
725 | uint64_t l0= (a&0x0303030303030303ULL)\ | ||
726 | + (b&0x0303030303030303ULL)\ | ||
727 | + 0x0202020202020202ULL;\ | ||
728 | uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||
729 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | ||
730 | uint64_t l1,h1;\ | ||
731 | \ | ||
732 | pixels+=line_size;\ | ||
733 | for(i=0; i<h; i+=2){\ | ||
734 | uint64_t a= AV_RN64(pixels );\ | ||
735 | uint64_t b= AV_RN64(pixels+1);\ | ||
736 | l1= (a&0x0303030303030303ULL)\ | ||
737 | + (b&0x0303030303030303ULL);\ | ||
738 | h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||
739 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | ||
740 | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | ||
741 | pixels+=line_size;\ | ||
742 | block +=line_size;\ | ||
743 | a= AV_RN64(pixels );\ | ||
744 | b= AV_RN64(pixels+1);\ | ||
745 | l0= (a&0x0303030303030303ULL)\ | ||
746 | + (b&0x0303030303030303ULL)\ | ||
747 | + 0x0202020202020202ULL;\ | ||
748 | h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||
749 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | ||
750 | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | ||
751 | pixels+=line_size;\ | ||
752 | block +=line_size;\ | ||
753 | }\ | ||
754 | }\ | ||
755 | \ | ||
756 | static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
757 | {\ | ||
758 | int i;\ | ||
759 | const uint64_t a= AV_RN64(pixels );\ | ||
760 | const uint64_t b= AV_RN64(pixels+1);\ | ||
761 | uint64_t l0= (a&0x0303030303030303ULL)\ | ||
762 | + (b&0x0303030303030303ULL)\ | ||
763 | + 0x0101010101010101ULL;\ | ||
764 | uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||
765 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | ||
766 | uint64_t l1,h1;\ | ||
767 | \ | ||
768 | pixels+=line_size;\ | ||
769 | for(i=0; i<h; i+=2){\ | ||
770 | uint64_t a= AV_RN64(pixels );\ | ||
771 | uint64_t b= AV_RN64(pixels+1);\ | ||
772 | l1= (a&0x0303030303030303ULL)\ | ||
773 | + (b&0x0303030303030303ULL);\ | ||
774 | h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||
775 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | ||
776 | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | ||
777 | pixels+=line_size;\ | ||
778 | block +=line_size;\ | ||
779 | a= AV_RN64(pixels );\ | ||
780 | b= AV_RN64(pixels+1);\ | ||
781 | l0= (a&0x0303030303030303ULL)\ | ||
782 | + (b&0x0303030303030303ULL)\ | ||
783 | + 0x0101010101010101ULL;\ | ||
784 | h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||
785 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | ||
786 | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | ||
787 | pixels+=line_size;\ | ||
788 | block +=line_size;\ | ||
789 | }\ | ||
790 | }\ | ||
791 | \ | ||
792 | CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ | ||
793 | CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ | ||
794 | CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ | ||
795 | CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ | ||
796 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ | ||
797 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ | ||
798 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) | ||
799 | |||
800 | #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | ||
801 | #else // 64 bit variant | ||
802 | |||
803 | #define PIXOP2(OPNAME, OP) \ | ||
804 | static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
805 | int i;\ | ||
806 | for(i=0; i<h; i++){\ | ||
807 | OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ | ||
808 | pixels+=line_size;\ | ||
809 | block +=line_size;\ | ||
810 | }\ | ||
811 | }\ | ||
812 | static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
813 | int i;\ | ||
814 | for(i=0; i<h; i++){\ | ||
815 | OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ | ||
816 | pixels+=line_size;\ | ||
817 | block +=line_size;\ | ||
818 | }\ | ||
819 | }\ | ||
820 | static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
821 | int i;\ | ||
822 | for(i=0; i<h; i++){\ | ||
823 | OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ | ||
824 | OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ | ||
825 | pixels+=line_size;\ | ||
826 | block +=line_size;\ | ||
827 | }\ | ||
828 | }\ | ||
829 | static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
830 | OPNAME ## _pixels8_c(block, pixels, line_size, h);\ | ||
831 | }\ | ||
832 | \ | ||
833 | static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | ||
834 | int src_stride1, int src_stride2, int h){\ | ||
835 | int i;\ | ||
836 | for(i=0; i<h; i++){\ | ||
837 | uint32_t a,b;\ | ||
838 | a= AV_RN32(&src1[i*src_stride1 ]);\ | ||
839 | b= AV_RN32(&src2[i*src_stride2 ]);\ | ||
840 | OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ | ||
841 | a= AV_RN32(&src1[i*src_stride1+4]);\ | ||
842 | b= AV_RN32(&src2[i*src_stride2+4]);\ | ||
843 | OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ | ||
844 | }\ | ||
845 | }\ | ||
846 | \ | ||
847 | static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | ||
848 | int src_stride1, int src_stride2, int h){\ | ||
849 | int i;\ | ||
850 | for(i=0; i<h; i++){\ | ||
851 | uint32_t a,b;\ | ||
852 | a= AV_RN32(&src1[i*src_stride1 ]);\ | ||
853 | b= AV_RN32(&src2[i*src_stride2 ]);\ | ||
854 | OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | ||
855 | a= AV_RN32(&src1[i*src_stride1+4]);\ | ||
856 | b= AV_RN32(&src2[i*src_stride2+4]);\ | ||
857 | OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ | ||
858 | }\ | ||
859 | }\ | ||
860 | \ | ||
861 | static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | ||
862 | int src_stride1, int src_stride2, int h){\ | ||
863 | int i;\ | ||
864 | for(i=0; i<h; i++){\ | ||
865 | uint32_t a,b;\ | ||
866 | a= AV_RN32(&src1[i*src_stride1 ]);\ | ||
867 | b= AV_RN32(&src2[i*src_stride2 ]);\ | ||
868 | OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | ||
869 | }\ | ||
870 | }\ | ||
871 | \ | ||
872 | static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | ||
873 | int src_stride1, int src_stride2, int h){\ | ||
874 | int i;\ | ||
875 | for(i=0; i<h; i++){\ | ||
876 | uint32_t a,b;\ | ||
877 | a= AV_RN16(&src1[i*src_stride1 ]);\ | ||
878 | b= AV_RN16(&src2[i*src_stride2 ]);\ | ||
879 | OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | ||
880 | }\ | ||
881 | }\ | ||
882 | \ | ||
883 | static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | ||
884 | int src_stride1, int src_stride2, int h){\ | ||
885 | OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | ||
886 | OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | ||
887 | }\ | ||
888 | \ | ||
889 | static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | ||
890 | int src_stride1, int src_stride2, int h){\ | ||
891 | OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | ||
892 | OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | ||
893 | }\ | ||
894 | \ | ||
895 | static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
896 | OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | ||
897 | }\ | ||
898 | \ | ||
899 | static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
900 | OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | ||
901 | }\ | ||
902 | \ | ||
903 | static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
904 | OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | ||
905 | }\ | ||
906 | \ | ||
907 | static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
908 | OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | ||
909 | }\ | ||
910 | \ | ||
911 | static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ | ||
912 | int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | ||
913 | int i;\ | ||
914 | for(i=0; i<h; i++){\ | ||
915 | uint32_t a, b, c, d, l0, l1, h0, h1;\ | ||
916 | a= AV_RN32(&src1[i*src_stride1]);\ | ||
917 | b= AV_RN32(&src2[i*src_stride2]);\ | ||
918 | c= AV_RN32(&src3[i*src_stride3]);\ | ||
919 | d= AV_RN32(&src4[i*src_stride4]);\ | ||
920 | l0= (a&0x03030303UL)\ | ||
921 | + (b&0x03030303UL)\ | ||
922 | + 0x02020202UL;\ | ||
923 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
924 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
925 | l1= (c&0x03030303UL)\ | ||
926 | + (d&0x03030303UL);\ | ||
927 | h1= ((c&0xFCFCFCFCUL)>>2)\ | ||
928 | + ((d&0xFCFCFCFCUL)>>2);\ | ||
929 | OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
930 | a= AV_RN32(&src1[i*src_stride1+4]);\ | ||
931 | b= AV_RN32(&src2[i*src_stride2+4]);\ | ||
932 | c= AV_RN32(&src3[i*src_stride3+4]);\ | ||
933 | d= AV_RN32(&src4[i*src_stride4+4]);\ | ||
934 | l0= (a&0x03030303UL)\ | ||
935 | + (b&0x03030303UL)\ | ||
936 | + 0x02020202UL;\ | ||
937 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
938 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
939 | l1= (c&0x03030303UL)\ | ||
940 | + (d&0x03030303UL);\ | ||
941 | h1= ((c&0xFCFCFCFCUL)>>2)\ | ||
942 | + ((d&0xFCFCFCFCUL)>>2);\ | ||
943 | OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
944 | }\ | ||
945 | }\ | ||
946 | \ | ||
947 | static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
948 | OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | ||
949 | }\ | ||
950 | \ | ||
951 | static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
952 | OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | ||
953 | }\ | ||
954 | \ | ||
955 | static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
956 | OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | ||
957 | }\ | ||
958 | \ | ||
959 | static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
960 | OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | ||
961 | }\ | ||
962 | \ | ||
963 | static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ | ||
964 | int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | ||
965 | int i;\ | ||
966 | for(i=0; i<h; i++){\ | ||
967 | uint32_t a, b, c, d, l0, l1, h0, h1;\ | ||
968 | a= AV_RN32(&src1[i*src_stride1]);\ | ||
969 | b= AV_RN32(&src2[i*src_stride2]);\ | ||
970 | c= AV_RN32(&src3[i*src_stride3]);\ | ||
971 | d= AV_RN32(&src4[i*src_stride4]);\ | ||
972 | l0= (a&0x03030303UL)\ | ||
973 | + (b&0x03030303UL)\ | ||
974 | + 0x01010101UL;\ | ||
975 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
976 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
977 | l1= (c&0x03030303UL)\ | ||
978 | + (d&0x03030303UL);\ | ||
979 | h1= ((c&0xFCFCFCFCUL)>>2)\ | ||
980 | + ((d&0xFCFCFCFCUL)>>2);\ | ||
981 | OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
982 | a= AV_RN32(&src1[i*src_stride1+4]);\ | ||
983 | b= AV_RN32(&src2[i*src_stride2+4]);\ | ||
984 | c= AV_RN32(&src3[i*src_stride3+4]);\ | ||
985 | d= AV_RN32(&src4[i*src_stride4+4]);\ | ||
986 | l0= (a&0x03030303UL)\ | ||
987 | + (b&0x03030303UL)\ | ||
988 | + 0x01010101UL;\ | ||
989 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
990 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
991 | l1= (c&0x03030303UL)\ | ||
992 | + (d&0x03030303UL);\ | ||
993 | h1= ((c&0xFCFCFCFCUL)>>2)\ | ||
994 | + ((d&0xFCFCFCFCUL)>>2);\ | ||
995 | OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
996 | }\ | ||
997 | }\ | ||
998 | static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ | ||
999 | int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | ||
1000 | OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | ||
1001 | OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | ||
1002 | }\ | ||
1003 | static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ | ||
1004 | int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | ||
1005 | OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | ||
1006 | OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | ||
1007 | }\ | ||
1008 | \ | ||
1009 | static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
1010 | {\ | ||
1011 | int i, a0, b0, a1, b1;\ | ||
1012 | a0= pixels[0];\ | ||
1013 | b0= pixels[1] + 2;\ | ||
1014 | a0 += b0;\ | ||
1015 | b0 += pixels[2];\ | ||
1016 | \ | ||
1017 | pixels+=line_size;\ | ||
1018 | for(i=0; i<h; i+=2){\ | ||
1019 | a1= pixels[0];\ | ||
1020 | b1= pixels[1];\ | ||
1021 | a1 += b1;\ | ||
1022 | b1 += pixels[2];\ | ||
1023 | \ | ||
1024 | block[0]= (a1+a0)>>2; /* FIXME non put */\ | ||
1025 | block[1]= (b1+b0)>>2;\ | ||
1026 | \ | ||
1027 | pixels+=line_size;\ | ||
1028 | block +=line_size;\ | ||
1029 | \ | ||
1030 | a0= pixels[0];\ | ||
1031 | b0= pixels[1] + 2;\ | ||
1032 | a0 += b0;\ | ||
1033 | b0 += pixels[2];\ | ||
1034 | \ | ||
1035 | block[0]= (a1+a0)>>2;\ | ||
1036 | block[1]= (b1+b0)>>2;\ | ||
1037 | pixels+=line_size;\ | ||
1038 | block +=line_size;\ | ||
1039 | }\ | ||
1040 | }\ | ||
1041 | \ | ||
1042 | static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
1043 | {\ | ||
1044 | int i;\ | ||
1045 | const uint32_t a= AV_RN32(pixels );\ | ||
1046 | const uint32_t b= AV_RN32(pixels+1);\ | ||
1047 | uint32_t l0= (a&0x03030303UL)\ | ||
1048 | + (b&0x03030303UL)\ | ||
1049 | + 0x02020202UL;\ | ||
1050 | uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
1051 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1052 | uint32_t l1,h1;\ | ||
1053 | \ | ||
1054 | pixels+=line_size;\ | ||
1055 | for(i=0; i<h; i+=2){\ | ||
1056 | uint32_t a= AV_RN32(pixels );\ | ||
1057 | uint32_t b= AV_RN32(pixels+1);\ | ||
1058 | l1= (a&0x03030303UL)\ | ||
1059 | + (b&0x03030303UL);\ | ||
1060 | h1= ((a&0xFCFCFCFCUL)>>2)\ | ||
1061 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1062 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
1063 | pixels+=line_size;\ | ||
1064 | block +=line_size;\ | ||
1065 | a= AV_RN32(pixels );\ | ||
1066 | b= AV_RN32(pixels+1);\ | ||
1067 | l0= (a&0x03030303UL)\ | ||
1068 | + (b&0x03030303UL)\ | ||
1069 | + 0x02020202UL;\ | ||
1070 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
1071 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1072 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
1073 | pixels+=line_size;\ | ||
1074 | block +=line_size;\ | ||
1075 | }\ | ||
1076 | }\ | ||
1077 | \ | ||
1078 | static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
1079 | {\ | ||
1080 | int j;\ | ||
1081 | for(j=0; j<2; j++){\ | ||
1082 | int i;\ | ||
1083 | const uint32_t a= AV_RN32(pixels );\ | ||
1084 | const uint32_t b= AV_RN32(pixels+1);\ | ||
1085 | uint32_t l0= (a&0x03030303UL)\ | ||
1086 | + (b&0x03030303UL)\ | ||
1087 | + 0x02020202UL;\ | ||
1088 | uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
1089 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1090 | uint32_t l1,h1;\ | ||
1091 | \ | ||
1092 | pixels+=line_size;\ | ||
1093 | for(i=0; i<h; i+=2){\ | ||
1094 | uint32_t a= AV_RN32(pixels );\ | ||
1095 | uint32_t b= AV_RN32(pixels+1);\ | ||
1096 | l1= (a&0x03030303UL)\ | ||
1097 | + (b&0x03030303UL);\ | ||
1098 | h1= ((a&0xFCFCFCFCUL)>>2)\ | ||
1099 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1100 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
1101 | pixels+=line_size;\ | ||
1102 | block +=line_size;\ | ||
1103 | a= AV_RN32(pixels );\ | ||
1104 | b= AV_RN32(pixels+1);\ | ||
1105 | l0= (a&0x03030303UL)\ | ||
1106 | + (b&0x03030303UL)\ | ||
1107 | + 0x02020202UL;\ | ||
1108 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
1109 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1110 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
1111 | pixels+=line_size;\ | ||
1112 | block +=line_size;\ | ||
1113 | }\ | ||
1114 | pixels+=4-line_size*(h+1);\ | ||
1115 | block +=4-line_size*h;\ | ||
1116 | }\ | ||
1117 | }\ | ||
1118 | \ | ||
1119 | static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
1120 | {\ | ||
1121 | int j;\ | ||
1122 | for(j=0; j<2; j++){\ | ||
1123 | int i;\ | ||
1124 | const uint32_t a= AV_RN32(pixels );\ | ||
1125 | const uint32_t b= AV_RN32(pixels+1);\ | ||
1126 | uint32_t l0= (a&0x03030303UL)\ | ||
1127 | + (b&0x03030303UL)\ | ||
1128 | + 0x01010101UL;\ | ||
1129 | uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
1130 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1131 | uint32_t l1,h1;\ | ||
1132 | \ | ||
1133 | pixels+=line_size;\ | ||
1134 | for(i=0; i<h; i+=2){\ | ||
1135 | uint32_t a= AV_RN32(pixels );\ | ||
1136 | uint32_t b= AV_RN32(pixels+1);\ | ||
1137 | l1= (a&0x03030303UL)\ | ||
1138 | + (b&0x03030303UL);\ | ||
1139 | h1= ((a&0xFCFCFCFCUL)>>2)\ | ||
1140 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1141 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
1142 | pixels+=line_size;\ | ||
1143 | block +=line_size;\ | ||
1144 | a= AV_RN32(pixels );\ | ||
1145 | b= AV_RN32(pixels+1);\ | ||
1146 | l0= (a&0x03030303UL)\ | ||
1147 | + (b&0x03030303UL)\ | ||
1148 | + 0x01010101UL;\ | ||
1149 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
1150 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1151 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
1152 | pixels+=line_size;\ | ||
1153 | block +=line_size;\ | ||
1154 | }\ | ||
1155 | pixels+=4-line_size*(h+1);\ | ||
1156 | block +=4-line_size*h;\ | ||
1157 | }\ | ||
1158 | }\ | ||
1159 | \ | ||
1160 | CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ | ||
1161 | CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ | ||
1162 | CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ | ||
1163 | CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ | ||
1164 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ | ||
1165 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ | ||
1166 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ | ||
1167 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ | ||
1168 | |||
1169 | #define op_avg(a, b) a = rnd_avg32(a, b) | ||
1170 | #endif | ||
1171 | #define op_put(a, b) a = b | ||
1172 | |||
1173 | PIXOP2(avg, op_avg) | ||
1174 | PIXOP2(put, op_put) | ||
1175 | #undef op_avg | ||
1176 | #undef op_put | ||
1177 | |||
1178 | #define avg2(a,b) ((a+b+1)>>1) | ||
1179 | #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | ||
1180 | |||
1181 | static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ | ||
1182 | put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); | ||
1183 | } | ||
1184 | |||
1185 | static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ | ||
1186 | put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); | ||
1187 | } | ||
1188 | |||
1189 | static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) | ||
1190 | { | ||
1191 | const int A=(16-x16)*(16-y16); | ||
1192 | const int B=( x16)*(16-y16); | ||
1193 | const int C=(16-x16)*( y16); | ||
1194 | const int D=( x16)*( y16); | ||
1195 | int i; | ||
1196 | |||
1197 | for(i=0; i<h; i++) | ||
1198 | { | ||
1199 | dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; | ||
1200 | dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | ||
1201 | dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; | ||
1202 | dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; | ||
1203 | dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; | ||
1204 | dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; | ||
1205 | dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; | ||
1206 | dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; | ||
1207 | dst+= stride; | ||
1208 | src+= stride; | ||
1209 | } | ||
1210 | } | ||
1211 | |||
1212 | void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, | ||
1213 | int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) | ||
1214 | { | ||
1215 | int y, vx, vy; | ||
1216 | const int s= 1<<shift; | ||
1217 | |||
1218 | width--; | ||
1219 | height--; | ||
1220 | |||
1221 | for(y=0; y<h; y++){ | ||
1222 | int x; | ||
1223 | |||
1224 | vx= ox; | ||
1225 | vy= oy; | ||
1226 | for(x=0; x<8; x++){ //XXX FIXME optimize | ||
1227 | int src_x, src_y, frac_x, frac_y, index; | ||
1228 | |||
1229 | src_x= vx>>16; | ||
1230 | src_y= vy>>16; | ||
1231 | frac_x= src_x&(s-1); | ||
1232 | frac_y= src_y&(s-1); | ||
1233 | src_x>>=shift; | ||
1234 | src_y>>=shift; | ||
1235 | |||
1236 | if((unsigned)src_x < width){ | ||
1237 | if((unsigned)src_y < height){ | ||
1238 | index= src_x + src_y*stride; | ||
1239 | dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | ||
1240 | + src[index +1]* frac_x )*(s-frac_y) | ||
1241 | + ( src[index+stride ]*(s-frac_x) | ||
1242 | + src[index+stride+1]* frac_x )* frac_y | ||
1243 | + r)>>(shift*2); | ||
1244 | }else{ | ||
1245 | index= src_x + av_clip(src_y, 0, height)*stride; | ||
1246 | dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | ||
1247 | + src[index +1]* frac_x )*s | ||
1248 | + r)>>(shift*2); | ||
1249 | } | ||
1250 | }else{ | ||
1251 | if((unsigned)src_y < height){ | ||
1252 | index= av_clip(src_x, 0, width) + src_y*stride; | ||
1253 | dst[y*stride + x]= ( ( src[index ]*(s-frac_y) | ||
1254 | + src[index+stride ]* frac_y )*s | ||
1255 | + r)>>(shift*2); | ||
1256 | }else{ | ||
1257 | index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride; | ||
1258 | dst[y*stride + x]= src[index ]; | ||
1259 | } | ||
1260 | } | ||
1261 | |||
1262 | vx+= dxx; | ||
1263 | vy+= dyx; | ||
1264 | } | ||
1265 | ox += dxy; | ||
1266 | oy += dyy; | ||
1267 | } | ||
1268 | } | ||
1269 | |||
1270 | static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1271 | switch(width){ | ||
1272 | case 2: put_pixels2_c (dst, src, stride, height); break; | ||
1273 | case 4: put_pixels4_c (dst, src, stride, height); break; | ||
1274 | case 8: put_pixels8_c (dst, src, stride, height); break; | ||
1275 | case 16:put_pixels16_c(dst, src, stride, height); break; | ||
1276 | } | ||
1277 | } | ||
1278 | |||
1279 | static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1280 | int i,j; | ||
1281 | for (i=0; i < height; i++) { | ||
1282 | for (j=0; j < width; j++) { | ||
1283 | dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; | ||
1284 | } | ||
1285 | src += stride; | ||
1286 | dst += stride; | ||
1287 | } | ||
1288 | } | ||
1289 | |||
1290 | static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1291 | int i,j; | ||
1292 | for (i=0; i < height; i++) { | ||
1293 | for (j=0; j < width; j++) { | ||
1294 | dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; | ||
1295 | } | ||
1296 | src += stride; | ||
1297 | dst += stride; | ||
1298 | } | ||
1299 | } | ||
1300 | |||
1301 | static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1302 | int i,j; | ||
1303 | for (i=0; i < height; i++) { | ||
1304 | for (j=0; j < width; j++) { | ||
1305 | dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; | ||
1306 | } | ||
1307 | src += stride; | ||
1308 | dst += stride; | ||
1309 | } | ||
1310 | } | ||
1311 | |||
1312 | static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1313 | int i,j; | ||
1314 | for (i=0; i < height; i++) { | ||
1315 | for (j=0; j < width; j++) { | ||
1316 | dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; | ||
1317 | } | ||
1318 | src += stride; | ||
1319 | dst += stride; | ||
1320 | } | ||
1321 | } | ||
1322 | |||
1323 | static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1324 | int i,j; | ||
1325 | for (i=0; i < height; i++) { | ||
1326 | for (j=0; j < width; j++) { | ||
1327 | dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | ||
1328 | } | ||
1329 | src += stride; | ||
1330 | dst += stride; | ||
1331 | } | ||
1332 | } | ||
1333 | |||
1334 | static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1335 | int i,j; | ||
1336 | for (i=0; i < height; i++) { | ||
1337 | for (j=0; j < width; j++) { | ||
1338 | dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; | ||
1339 | } | ||
1340 | src += stride; | ||
1341 | dst += stride; | ||
1342 | } | ||
1343 | } | ||
1344 | |||
1345 | static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1346 | int i,j; | ||
1347 | for (i=0; i < height; i++) { | ||
1348 | for (j=0; j < width; j++) { | ||
1349 | dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | ||
1350 | } | ||
1351 | src += stride; | ||
1352 | dst += stride; | ||
1353 | } | ||
1354 | } | ||
1355 | |||
1356 | static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1357 | int i,j; | ||
1358 | for (i=0; i < height; i++) { | ||
1359 | for (j=0; j < width; j++) { | ||
1360 | dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; | ||
1361 | } | ||
1362 | src += stride; | ||
1363 | dst += stride; | ||
1364 | } | ||
1365 | } | ||
1366 | |||
1367 | static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1368 | switch(width){ | ||
1369 | case 2: avg_pixels2_c (dst, src, stride, height); break; | ||
1370 | case 4: avg_pixels4_c (dst, src, stride, height); break; | ||
1371 | case 8: avg_pixels8_c (dst, src, stride, height); break; | ||
1372 | case 16:avg_pixels16_c(dst, src, stride, height); break; | ||
1373 | } | ||
1374 | } | ||
1375 | |||
1376 | static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1377 | int i,j; | ||
1378 | for (i=0; i < height; i++) { | ||
1379 | for (j=0; j < width; j++) { | ||
1380 | dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; | ||
1381 | } | ||
1382 | src += stride; | ||
1383 | dst += stride; | ||
1384 | } | ||
1385 | } | ||
1386 | |||
1387 | static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1388 | int i,j; | ||
1389 | for (i=0; i < height; i++) { | ||
1390 | for (j=0; j < width; j++) { | ||
1391 | dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; | ||
1392 | } | ||
1393 | src += stride; | ||
1394 | dst += stride; | ||
1395 | } | ||
1396 | } | ||
1397 | |||
1398 | static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1399 | int i,j; | ||
1400 | for (i=0; i < height; i++) { | ||
1401 | for (j=0; j < width; j++) { | ||
1402 | dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; | ||
1403 | } | ||
1404 | src += stride; | ||
1405 | dst += stride; | ||
1406 | } | ||
1407 | } | ||
1408 | |||
1409 | static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1410 | int i,j; | ||
1411 | for (i=0; i < height; i++) { | ||
1412 | for (j=0; j < width; j++) { | ||
1413 | dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | ||
1414 | } | ||
1415 | src += stride; | ||
1416 | dst += stride; | ||
1417 | } | ||
1418 | } | ||
1419 | |||
1420 | static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1421 | int i,j; | ||
1422 | for (i=0; i < height; i++) { | ||
1423 | for (j=0; j < width; j++) { | ||
1424 | dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | ||
1425 | } | ||
1426 | src += stride; | ||
1427 | dst += stride; | ||
1428 | } | ||
1429 | } | ||
1430 | |||
1431 | static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1432 | int i,j; | ||
1433 | for (i=0; i < height; i++) { | ||
1434 | for (j=0; j < width; j++) { | ||
1435 | dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; | ||
1436 | } | ||
1437 | src += stride; | ||
1438 | dst += stride; | ||
1439 | } | ||
1440 | } | ||
1441 | |||
1442 | static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1443 | int i,j; | ||
1444 | for (i=0; i < height; i++) { | ||
1445 | for (j=0; j < width; j++) { | ||
1446 | dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | ||
1447 | } | ||
1448 | src += stride; | ||
1449 | dst += stride; | ||
1450 | } | ||
1451 | } | ||
1452 | |||
1453 | static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1454 | int i,j; | ||
1455 | for (i=0; i < height; i++) { | ||
1456 | for (j=0; j < width; j++) { | ||
1457 | dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | ||
1458 | } | ||
1459 | src += stride; | ||
1460 | dst += stride; | ||
1461 | } | ||
1462 | } | ||
1463 | #if 0 | ||
1464 | #define TPEL_WIDTH(width)\ | ||
1465 | static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1466 | void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ | ||
1467 | static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1468 | void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ | ||
1469 | static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1470 | void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ | ||
1471 | static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1472 | void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ | ||
1473 | static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1474 | void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ | ||
1475 | static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1476 | void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ | ||
1477 | static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1478 | void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ | ||
1479 | static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1480 | void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ | ||
1481 | static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1482 | void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} | ||
1483 | #endif | ||
1484 | |||
1485 | #define H264_CHROMA_MC(OPNAME, OP)\ | ||
1486 | static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | ||
1487 | const int A=(8-x)*(8-y);\ | ||
1488 | const int B=( x)*(8-y);\ | ||
1489 | const int C=(8-x)*( y);\ | ||
1490 | const int D=( x)*( y);\ | ||
1491 | int i;\ | ||
1492 | \ | ||
1493 | assert(x<8 && y<8 && x>=0 && y>=0);\ | ||
1494 | \ | ||
1495 | if(D){\ | ||
1496 | for(i=0; i<h; i++){\ | ||
1497 | OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | ||
1498 | OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | ||
1499 | dst+= stride;\ | ||
1500 | src+= stride;\ | ||
1501 | }\ | ||
1502 | }else{\ | ||
1503 | const int E= B+C;\ | ||
1504 | const int step= C ? stride : 1;\ | ||
1505 | for(i=0; i<h; i++){\ | ||
1506 | OP(dst[0], (A*src[0] + E*src[step+0]));\ | ||
1507 | OP(dst[1], (A*src[1] + E*src[step+1]));\ | ||
1508 | dst+= stride;\ | ||
1509 | src+= stride;\ | ||
1510 | }\ | ||
1511 | }\ | ||
1512 | }\ | ||
1513 | \ | ||
1514 | static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | ||
1515 | const int A=(8-x)*(8-y);\ | ||
1516 | const int B=( x)*(8-y);\ | ||
1517 | const int C=(8-x)*( y);\ | ||
1518 | const int D=( x)*( y);\ | ||
1519 | int i;\ | ||
1520 | \ | ||
1521 | assert(x<8 && y<8 && x>=0 && y>=0);\ | ||
1522 | \ | ||
1523 | if(D){\ | ||
1524 | for(i=0; i<h; i++){\ | ||
1525 | OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | ||
1526 | OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | ||
1527 | OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | ||
1528 | OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | ||
1529 | dst+= stride;\ | ||
1530 | src+= stride;\ | ||
1531 | }\ | ||
1532 | }else{\ | ||
1533 | const int E= B+C;\ | ||
1534 | const int step= C ? stride : 1;\ | ||
1535 | for(i=0; i<h; i++){\ | ||
1536 | OP(dst[0], (A*src[0] + E*src[step+0]));\ | ||
1537 | OP(dst[1], (A*src[1] + E*src[step+1]));\ | ||
1538 | OP(dst[2], (A*src[2] + E*src[step+2]));\ | ||
1539 | OP(dst[3], (A*src[3] + E*src[step+3]));\ | ||
1540 | dst+= stride;\ | ||
1541 | src+= stride;\ | ||
1542 | }\ | ||
1543 | }\ | ||
1544 | }\ | ||
1545 | \ | ||
1546 | static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | ||
1547 | const int A=(8-x)*(8-y);\ | ||
1548 | const int B=( x)*(8-y);\ | ||
1549 | const int C=(8-x)*( y);\ | ||
1550 | const int D=( x)*( y);\ | ||
1551 | int i;\ | ||
1552 | \ | ||
1553 | assert(x<8 && y<8 && x>=0 && y>=0);\ | ||
1554 | \ | ||
1555 | if(D){\ | ||
1556 | for(i=0; i<h; i++){\ | ||
1557 | OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | ||
1558 | OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | ||
1559 | OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | ||
1560 | OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | ||
1561 | OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ | ||
1562 | OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ | ||
1563 | OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ | ||
1564 | OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ | ||
1565 | dst+= stride;\ | ||
1566 | src+= stride;\ | ||
1567 | }\ | ||
1568 | }else{\ | ||
1569 | const int E= B+C;\ | ||
1570 | const int step= C ? stride : 1;\ | ||
1571 | for(i=0; i<h; i++){\ | ||
1572 | OP(dst[0], (A*src[0] + E*src[step+0]));\ | ||
1573 | OP(dst[1], (A*src[1] + E*src[step+1]));\ | ||
1574 | OP(dst[2], (A*src[2] + E*src[step+2]));\ | ||
1575 | OP(dst[3], (A*src[3] + E*src[step+3]));\ | ||
1576 | OP(dst[4], (A*src[4] + E*src[step+4]));\ | ||
1577 | OP(dst[5], (A*src[5] + E*src[step+5]));\ | ||
1578 | OP(dst[6], (A*src[6] + E*src[step+6]));\ | ||
1579 | OP(dst[7], (A*src[7] + E*src[step+7]));\ | ||
1580 | dst+= stride;\ | ||
1581 | src+= stride;\ | ||
1582 | }\ | ||
1583 | }\ | ||
1584 | } | ||
1585 | |||
1586 | #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) | ||
1587 | #define op_put(a, b) a = (((b) + 32)>>6) | ||
1588 | |||
1589 | H264_CHROMA_MC(put_ , op_put) | ||
1590 | H264_CHROMA_MC(avg_ , op_avg) | ||
1591 | #undef op_avg | ||
1592 | #undef op_put | ||
1593 | |||
1594 | static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ | ||
1595 | const int A=(8-x)*(8-y); | ||
1596 | const int B=( x)*(8-y); | ||
1597 | const int C=(8-x)*( y); | ||
1598 | const int D=( x)*( y); | ||
1599 | int i; | ||
1600 | |||
1601 | assert(x<8 && y<8 && x>=0 && y>=0); | ||
1602 | |||
1603 | for(i=0; i<h; i++) | ||
1604 | { | ||
1605 | dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6; | ||
1606 | dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6; | ||
1607 | dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6; | ||
1608 | dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6; | ||
1609 | dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6; | ||
1610 | dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6; | ||
1611 | dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6; | ||
1612 | dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6; | ||
1613 | dst+= stride; | ||
1614 | src+= stride; | ||
1615 | } | ||
1616 | } | ||
1617 | |||
1618 | static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ | ||
1619 | const int A=(8-x)*(8-y); | ||
1620 | const int B=( x)*(8-y); | ||
1621 | const int C=(8-x)*( y); | ||
1622 | const int D=( x)*( y); | ||
1623 | int i; | ||
1624 | |||
1625 | assert(x<8 && y<8 && x>=0 && y>=0); | ||
1626 | |||
1627 | for(i=0; i<h; i++) | ||
1628 | { | ||
1629 | dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6)); | ||
1630 | dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6)); | ||
1631 | dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6)); | ||
1632 | dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6)); | ||
1633 | dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6)); | ||
1634 | dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6)); | ||
1635 | dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6)); | ||
1636 | dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6)); | ||
1637 | dst+= stride; | ||
1638 | src+= stride; | ||
1639 | } | ||
1640 | } | ||
1641 | |||
1642 | #define QPEL_MC(r, OPNAME, RND, OP) \ | ||
1643 | static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ | ||
1644 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
1645 | int i;\ | ||
1646 | for(i=0; i<h; i++)\ | ||
1647 | {\ | ||
1648 | OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ | ||
1649 | OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ | ||
1650 | OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ | ||
1651 | OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ | ||
1652 | OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ | ||
1653 | OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ | ||
1654 | OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ | ||
1655 | OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ | ||
1656 | dst+=dstStride;\ | ||
1657 | src+=srcStride;\ | ||
1658 | }\ | ||
1659 | }\ | ||
1660 | \ | ||
1661 | static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
1662 | const int w=8;\ | ||
1663 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
1664 | int i;\ | ||
1665 | for(i=0; i<w; i++)\ | ||
1666 | {\ | ||
1667 | const int src0= src[0*srcStride];\ | ||
1668 | const int src1= src[1*srcStride];\ | ||
1669 | const int src2= src[2*srcStride];\ | ||
1670 | const int src3= src[3*srcStride];\ | ||
1671 | const int src4= src[4*srcStride];\ | ||
1672 | const int src5= src[5*srcStride];\ | ||
1673 | const int src6= src[6*srcStride];\ | ||
1674 | const int src7= src[7*srcStride];\ | ||
1675 | const int src8= src[8*srcStride];\ | ||
1676 | OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ | ||
1677 | OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ | ||
1678 | OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ | ||
1679 | OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ | ||
1680 | OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ | ||
1681 | OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ | ||
1682 | OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ | ||
1683 | OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ | ||
1684 | dst++;\ | ||
1685 | src++;\ | ||
1686 | }\ | ||
1687 | }\ | ||
1688 | \ | ||
1689 | static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ | ||
1690 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
1691 | int i;\ | ||
1692 | \ | ||
1693 | for(i=0; i<h; i++)\ | ||
1694 | {\ | ||
1695 | OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ | ||
1696 | OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ | ||
1697 | OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ | ||
1698 | OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ | ||
1699 | OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ | ||
1700 | OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ | ||
1701 | OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ | ||
1702 | OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ | ||
1703 | OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ | ||
1704 | OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ | ||
1705 | OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ | ||
1706 | OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ | ||
1707 | OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ | ||
1708 | OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ | ||
1709 | OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ | ||
1710 | OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ | ||
1711 | dst+=dstStride;\ | ||
1712 | src+=srcStride;\ | ||
1713 | }\ | ||
1714 | }\ | ||
1715 | \ | ||
1716 | static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
1717 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
1718 | int i;\ | ||
1719 | const int w=16;\ | ||
1720 | for(i=0; i<w; i++)\ | ||
1721 | {\ | ||
1722 | const int src0= src[0*srcStride];\ | ||
1723 | const int src1= src[1*srcStride];\ | ||
1724 | const int src2= src[2*srcStride];\ | ||
1725 | const int src3= src[3*srcStride];\ | ||
1726 | const int src4= src[4*srcStride];\ | ||
1727 | const int src5= src[5*srcStride];\ | ||
1728 | const int src6= src[6*srcStride];\ | ||
1729 | const int src7= src[7*srcStride];\ | ||
1730 | const int src8= src[8*srcStride];\ | ||
1731 | const int src9= src[9*srcStride];\ | ||
1732 | const int src10= src[10*srcStride];\ | ||
1733 | const int src11= src[11*srcStride];\ | ||
1734 | const int src12= src[12*srcStride];\ | ||
1735 | const int src13= src[13*srcStride];\ | ||
1736 | const int src14= src[14*srcStride];\ | ||
1737 | const int src15= src[15*srcStride];\ | ||
1738 | const int src16= src[16*srcStride];\ | ||
1739 | OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ | ||
1740 | OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ | ||
1741 | OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ | ||
1742 | OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ | ||
1743 | OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ | ||
1744 | OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ | ||
1745 | OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ | ||
1746 | OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ | ||
1747 | OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ | ||
1748 | OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ | ||
1749 | OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ | ||
1750 | OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ | ||
1751 | OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ | ||
1752 | OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ | ||
1753 | OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ | ||
1754 | OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ | ||
1755 | dst++;\ | ||
1756 | src++;\ | ||
1757 | }\ | ||
1758 | }\ | ||
1759 | \ | ||
1760 | static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | ||
1761 | OPNAME ## pixels8_c(dst, src, stride, 8);\ | ||
1762 | }\ | ||
1763 | \ | ||
1764 | static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1765 | uint8_t half[64];\ | ||
1766 | put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ | ||
1767 | OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ | ||
1768 | }\ | ||
1769 | \ | ||
1770 | static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1771 | OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ | ||
1772 | }\ | ||
1773 | \ | ||
1774 | static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1775 | uint8_t half[64];\ | ||
1776 | put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ | ||
1777 | OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ | ||
1778 | }\ | ||
1779 | \ | ||
1780 | static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1781 | uint8_t full[16*9];\ | ||
1782 | uint8_t half[64];\ | ||
1783 | copy_block9(full, src, 16, stride, 9);\ | ||
1784 | put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ | ||
1785 | OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ | ||
1786 | }\ | ||
1787 | \ | ||
1788 | static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1789 | uint8_t full[16*9];\ | ||
1790 | copy_block9(full, src, 16, stride, 9);\ | ||
1791 | OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ | ||
1792 | }\ | ||
1793 | \ | ||
1794 | static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1795 | uint8_t full[16*9];\ | ||
1796 | uint8_t half[64];\ | ||
1797 | copy_block9(full, src, 16, stride, 9);\ | ||
1798 | put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ | ||
1799 | OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ | ||
1800 | }\ | ||
1801 | void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1802 | uint8_t full[16*9];\ | ||
1803 | uint8_t halfH[72];\ | ||
1804 | uint8_t halfV[64];\ | ||
1805 | uint8_t halfHV[64];\ | ||
1806 | copy_block9(full, src, 16, stride, 9);\ | ||
1807 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1808 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ | ||
1809 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1810 | OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ | ||
1811 | }\ | ||
1812 | static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1813 | uint8_t full[16*9];\ | ||
1814 | uint8_t halfH[72];\ | ||
1815 | uint8_t halfHV[64];\ | ||
1816 | copy_block9(full, src, 16, stride, 9);\ | ||
1817 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1818 | put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | ||
1819 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1820 | OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | ||
1821 | }\ | ||
1822 | void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1823 | uint8_t full[16*9];\ | ||
1824 | uint8_t halfH[72];\ | ||
1825 | uint8_t halfV[64];\ | ||
1826 | uint8_t halfHV[64];\ | ||
1827 | copy_block9(full, src, 16, stride, 9);\ | ||
1828 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1829 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ | ||
1830 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1831 | OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ | ||
1832 | }\ | ||
1833 | static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1834 | uint8_t full[16*9];\ | ||
1835 | uint8_t halfH[72];\ | ||
1836 | uint8_t halfHV[64];\ | ||
1837 | copy_block9(full, src, 16, stride, 9);\ | ||
1838 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1839 | put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | ||
1840 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1841 | OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | ||
1842 | }\ | ||
1843 | void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1844 | uint8_t full[16*9];\ | ||
1845 | uint8_t halfH[72];\ | ||
1846 | uint8_t halfV[64];\ | ||
1847 | uint8_t halfHV[64];\ | ||
1848 | copy_block9(full, src, 16, stride, 9);\ | ||
1849 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1850 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ | ||
1851 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1852 | OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ | ||
1853 | }\ | ||
1854 | static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1855 | uint8_t full[16*9];\ | ||
1856 | uint8_t halfH[72];\ | ||
1857 | uint8_t halfHV[64];\ | ||
1858 | copy_block9(full, src, 16, stride, 9);\ | ||
1859 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1860 | put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | ||
1861 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1862 | OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | ||
1863 | }\ | ||
1864 | void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1865 | uint8_t full[16*9];\ | ||
1866 | uint8_t halfH[72];\ | ||
1867 | uint8_t halfV[64];\ | ||
1868 | uint8_t halfHV[64];\ | ||
1869 | copy_block9(full, src, 16, stride, 9);\ | ||
1870 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ | ||
1871 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ | ||
1872 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1873 | OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ | ||
1874 | }\ | ||
1875 | static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1876 | uint8_t full[16*9];\ | ||
1877 | uint8_t halfH[72];\ | ||
1878 | uint8_t halfHV[64];\ | ||
1879 | copy_block9(full, src, 16, stride, 9);\ | ||
1880 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1881 | put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | ||
1882 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1883 | OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | ||
1884 | }\ | ||
1885 | static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1886 | uint8_t halfH[72];\ | ||
1887 | uint8_t halfHV[64];\ | ||
1888 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ | ||
1889 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1890 | OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | ||
1891 | }\ | ||
1892 | static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1893 | uint8_t halfH[72];\ | ||
1894 | uint8_t halfHV[64];\ | ||
1895 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ | ||
1896 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1897 | OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | ||
1898 | }\ | ||
1899 | void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1900 | uint8_t full[16*9];\ | ||
1901 | uint8_t halfH[72];\ | ||
1902 | uint8_t halfV[64];\ | ||
1903 | uint8_t halfHV[64];\ | ||
1904 | copy_block9(full, src, 16, stride, 9);\ | ||
1905 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1906 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ | ||
1907 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1908 | OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ | ||
1909 | }\ | ||
1910 | static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1911 | uint8_t full[16*9];\ | ||
1912 | uint8_t halfH[72];\ | ||
1913 | copy_block9(full, src, 16, stride, 9);\ | ||
1914 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1915 | put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | ||
1916 | OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | ||
1917 | }\ | ||
1918 | void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1919 | uint8_t full[16*9];\ | ||
1920 | uint8_t halfH[72];\ | ||
1921 | uint8_t halfV[64];\ | ||
1922 | uint8_t halfHV[64];\ | ||
1923 | copy_block9(full, src, 16, stride, 9);\ | ||
1924 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1925 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ | ||
1926 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1927 | OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ | ||
1928 | }\ | ||
1929 | static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1930 | uint8_t full[16*9];\ | ||
1931 | uint8_t halfH[72];\ | ||
1932 | copy_block9(full, src, 16, stride, 9);\ | ||
1933 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1934 | put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | ||
1935 | OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | ||
1936 | }\ | ||
1937 | static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1938 | uint8_t halfH[72];\ | ||
1939 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ | ||
1940 | OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | ||
1941 | }\ | ||
1942 | static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | ||
1943 | OPNAME ## pixels16_c(dst, src, stride, 16);\ | ||
1944 | }\ | ||
1945 | \ | ||
1946 | static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1947 | uint8_t half[256];\ | ||
1948 | put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ | ||
1949 | OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ | ||
1950 | }\ | ||
1951 | \ | ||
1952 | static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1953 | OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ | ||
1954 | }\ | ||
1955 | \ | ||
1956 | static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1957 | uint8_t half[256];\ | ||
1958 | put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ | ||
1959 | OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ | ||
1960 | }\ | ||
1961 | \ | ||
1962 | static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1963 | uint8_t full[24*17];\ | ||
1964 | uint8_t half[256];\ | ||
1965 | copy_block17(full, src, 24, stride, 17);\ | ||
1966 | put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ | ||
1967 | OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ | ||
1968 | }\ | ||
1969 | \ | ||
1970 | static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1971 | uint8_t full[24*17];\ | ||
1972 | copy_block17(full, src, 24, stride, 17);\ | ||
1973 | OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ | ||
1974 | }\ | ||
1975 | \ | ||
1976 | static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1977 | uint8_t full[24*17];\ | ||
1978 | uint8_t half[256];\ | ||
1979 | copy_block17(full, src, 24, stride, 17);\ | ||
1980 | put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ | ||
1981 | OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ | ||
1982 | }\ | ||
1983 | void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1984 | uint8_t full[24*17];\ | ||
1985 | uint8_t halfH[272];\ | ||
1986 | uint8_t halfV[256];\ | ||
1987 | uint8_t halfHV[256];\ | ||
1988 | copy_block17(full, src, 24, stride, 17);\ | ||
1989 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
1990 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ | ||
1991 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1992 | OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ | ||
1993 | }\ | ||
1994 | static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1995 | uint8_t full[24*17];\ | ||
1996 | uint8_t halfH[272];\ | ||
1997 | uint8_t halfHV[256];\ | ||
1998 | copy_block17(full, src, 24, stride, 17);\ | ||
1999 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
2000 | put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | ||
2001 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
2002 | OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | ||
2003 | }\ | ||
2004 | void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2005 | uint8_t full[24*17];\ | ||
2006 | uint8_t halfH[272];\ | ||
2007 | uint8_t halfV[256];\ | ||
2008 | uint8_t halfHV[256];\ | ||
2009 | copy_block17(full, src, 24, stride, 17);\ | ||
2010 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
2011 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ | ||
2012 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
2013 | OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ | ||
2014 | }\ | ||
2015 | static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2016 | uint8_t full[24*17];\ | ||
2017 | uint8_t halfH[272];\ | ||
2018 | uint8_t halfHV[256];\ | ||
2019 | copy_block17(full, src, 24, stride, 17);\ | ||
2020 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
2021 | put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | ||
2022 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
2023 | OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | ||
2024 | }\ | ||
2025 | void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2026 | uint8_t full[24*17];\ | ||
2027 | uint8_t halfH[272];\ | ||
2028 | uint8_t halfV[256];\ | ||
2029 | uint8_t halfHV[256];\ | ||
2030 | copy_block17(full, src, 24, stride, 17);\ | ||
2031 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
2032 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ | ||
2033 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
2034 | OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ | ||
2035 | }\ | ||
2036 | static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2037 | uint8_t full[24*17];\ | ||
2038 | uint8_t halfH[272];\ | ||
2039 | uint8_t halfHV[256];\ | ||
2040 | copy_block17(full, src, 24, stride, 17);\ | ||
2041 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
2042 | put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | ||
2043 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
2044 | OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | ||
2045 | }\ | ||
2046 | void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2047 | uint8_t full[24*17];\ | ||
2048 | uint8_t halfH[272];\ | ||
2049 | uint8_t halfV[256];\ | ||
2050 | uint8_t halfHV[256];\ | ||
2051 | copy_block17(full, src, 24, stride, 17);\ | ||
2052 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ | ||
2053 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ | ||
2054 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
2055 | OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ | ||
2056 | }\ | ||
2057 | static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2058 | uint8_t full[24*17];\ | ||
2059 | uint8_t halfH[272];\ | ||
2060 | uint8_t halfHV[256];\ | ||
2061 | copy_block17(full, src, 24, stride, 17);\ | ||
2062 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
2063 | put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | ||
2064 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
2065 | OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | ||
2066 | }\ | ||
2067 | static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2068 | uint8_t halfH[272];\ | ||
2069 | uint8_t halfHV[256];\ | ||
2070 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ | ||
2071 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
2072 | OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | ||
2073 | }\ | ||
2074 | static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2075 | uint8_t halfH[272];\ | ||
2076 | uint8_t halfHV[256];\ | ||
2077 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ | ||
2078 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
2079 | OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | ||
2080 | }\ | ||
2081 | void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2082 | uint8_t full[24*17];\ | ||
2083 | uint8_t halfH[272];\ | ||
2084 | uint8_t halfV[256];\ | ||
2085 | uint8_t halfHV[256];\ | ||
2086 | copy_block17(full, src, 24, stride, 17);\ | ||
2087 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
2088 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ | ||
2089 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
2090 | OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ | ||
2091 | }\ | ||
2092 | static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2093 | uint8_t full[24*17];\ | ||
2094 | uint8_t halfH[272];\ | ||
2095 | copy_block17(full, src, 24, stride, 17);\ | ||
2096 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
2097 | put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | ||
2098 | OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | ||
2099 | }\ | ||
2100 | void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2101 | uint8_t full[24*17];\ | ||
2102 | uint8_t halfH[272];\ | ||
2103 | uint8_t halfV[256];\ | ||
2104 | uint8_t halfHV[256];\ | ||
2105 | copy_block17(full, src, 24, stride, 17);\ | ||
2106 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
2107 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ | ||
2108 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
2109 | OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ | ||
2110 | }\ | ||
2111 | static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2112 | uint8_t full[24*17];\ | ||
2113 | uint8_t halfH[272];\ | ||
2114 | copy_block17(full, src, 24, stride, 17);\ | ||
2115 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
2116 | put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | ||
2117 | OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | ||
2118 | }\ | ||
2119 | static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2120 | uint8_t halfH[272];\ | ||
2121 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ | ||
2122 | OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | ||
2123 | } | ||
2124 | |||
2125 | #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | ||
2126 | #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) | ||
2127 | #define op_put(a, b) a = cm[((b) + 16)>>5] | ||
2128 | #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] | ||
2129 | |||
2130 | QPEL_MC(0, put_ , _ , op_put) | ||
2131 | QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) | ||
2132 | QPEL_MC(0, avg_ , _ , op_avg) | ||
2133 | //QPEL_MC(1, avg_no_rnd , _ , op_avg) | ||
2134 | #undef op_avg | ||
2135 | #undef op_avg_no_rnd | ||
2136 | #undef op_put | ||
2137 | #undef op_put_no_rnd | ||
2138 | |||
2139 | #if 1 | ||
2140 | #define H264_LOWPASS(OPNAME, OP, OP2) \ | ||
2141 | static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2142 | const int h=2;\ | ||
2143 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2144 | int i;\ | ||
2145 | for(i=0; i<h; i++)\ | ||
2146 | {\ | ||
2147 | OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | ||
2148 | OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | ||
2149 | dst+=dstStride;\ | ||
2150 | src+=srcStride;\ | ||
2151 | }\ | ||
2152 | }\ | ||
2153 | \ | ||
2154 | static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2155 | const int w=2;\ | ||
2156 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2157 | int i;\ | ||
2158 | for(i=0; i<w; i++)\ | ||
2159 | {\ | ||
2160 | const int srcB= src[-2*srcStride];\ | ||
2161 | const int srcA= src[-1*srcStride];\ | ||
2162 | const int src0= src[0 *srcStride];\ | ||
2163 | const int src1= src[1 *srcStride];\ | ||
2164 | const int src2= src[2 *srcStride];\ | ||
2165 | const int src3= src[3 *srcStride];\ | ||
2166 | const int src4= src[4 *srcStride];\ | ||
2167 | OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | ||
2168 | OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | ||
2169 | dst++;\ | ||
2170 | src++;\ | ||
2171 | }\ | ||
2172 | }\ | ||
2173 | \ | ||
2174 | static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | ||
2175 | const int h=2;\ | ||
2176 | const int w=2;\ | ||
2177 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2178 | int i;\ | ||
2179 | src -= 2*srcStride;\ | ||
2180 | for(i=0; i<h+5; i++)\ | ||
2181 | {\ | ||
2182 | tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | ||
2183 | tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | ||
2184 | tmp+=tmpStride;\ | ||
2185 | src+=srcStride;\ | ||
2186 | }\ | ||
2187 | tmp -= tmpStride*(h+5-2);\ | ||
2188 | for(i=0; i<w; i++)\ | ||
2189 | {\ | ||
2190 | const int tmpB= tmp[-2*tmpStride];\ | ||
2191 | const int tmpA= tmp[-1*tmpStride];\ | ||
2192 | const int tmp0= tmp[0 *tmpStride];\ | ||
2193 | const int tmp1= tmp[1 *tmpStride];\ | ||
2194 | const int tmp2= tmp[2 *tmpStride];\ | ||
2195 | const int tmp3= tmp[3 *tmpStride];\ | ||
2196 | const int tmp4= tmp[4 *tmpStride];\ | ||
2197 | OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | ||
2198 | OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | ||
2199 | dst++;\ | ||
2200 | tmp++;\ | ||
2201 | }\ | ||
2202 | }\ | ||
2203 | static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2204 | const int h=4;\ | ||
2205 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2206 | int i;\ | ||
2207 | for(i=0; i<h; i++)\ | ||
2208 | {\ | ||
2209 | OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | ||
2210 | OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | ||
2211 | OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ | ||
2212 | OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ | ||
2213 | dst+=dstStride;\ | ||
2214 | src+=srcStride;\ | ||
2215 | }\ | ||
2216 | }\ | ||
2217 | \ | ||
2218 | static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2219 | const int w=4;\ | ||
2220 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2221 | int i;\ | ||
2222 | for(i=0; i<w; i++)\ | ||
2223 | {\ | ||
2224 | const int srcB= src[-2*srcStride];\ | ||
2225 | const int srcA= src[-1*srcStride];\ | ||
2226 | const int src0= src[0 *srcStride];\ | ||
2227 | const int src1= src[1 *srcStride];\ | ||
2228 | const int src2= src[2 *srcStride];\ | ||
2229 | const int src3= src[3 *srcStride];\ | ||
2230 | const int src4= src[4 *srcStride];\ | ||
2231 | const int src5= src[5 *srcStride];\ | ||
2232 | const int src6= src[6 *srcStride];\ | ||
2233 | OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | ||
2234 | OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | ||
2235 | OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | ||
2236 | OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | ||
2237 | dst++;\ | ||
2238 | src++;\ | ||
2239 | }\ | ||
2240 | }\ | ||
2241 | \ | ||
2242 | static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | ||
2243 | const int h=4;\ | ||
2244 | const int w=4;\ | ||
2245 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2246 | int i;\ | ||
2247 | src -= 2*srcStride;\ | ||
2248 | for(i=0; i<h+5; i++)\ | ||
2249 | {\ | ||
2250 | tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | ||
2251 | tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | ||
2252 | tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ | ||
2253 | tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ | ||
2254 | tmp+=tmpStride;\ | ||
2255 | src+=srcStride;\ | ||
2256 | }\ | ||
2257 | tmp -= tmpStride*(h+5-2);\ | ||
2258 | for(i=0; i<w; i++)\ | ||
2259 | {\ | ||
2260 | const int tmpB= tmp[-2*tmpStride];\ | ||
2261 | const int tmpA= tmp[-1*tmpStride];\ | ||
2262 | const int tmp0= tmp[0 *tmpStride];\ | ||
2263 | const int tmp1= tmp[1 *tmpStride];\ | ||
2264 | const int tmp2= tmp[2 *tmpStride];\ | ||
2265 | const int tmp3= tmp[3 *tmpStride];\ | ||
2266 | const int tmp4= tmp[4 *tmpStride];\ | ||
2267 | const int tmp5= tmp[5 *tmpStride];\ | ||
2268 | const int tmp6= tmp[6 *tmpStride];\ | ||
2269 | OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | ||
2270 | OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | ||
2271 | OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | ||
2272 | OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | ||
2273 | dst++;\ | ||
2274 | tmp++;\ | ||
2275 | }\ | ||
2276 | }\ | ||
2277 | \ | ||
2278 | static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2279 | const int h=8;\ | ||
2280 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2281 | int i;\ | ||
2282 | for(i=0; i<h; i++)\ | ||
2283 | {\ | ||
2284 | OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ | ||
2285 | OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ | ||
2286 | OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ | ||
2287 | OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ | ||
2288 | OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ | ||
2289 | OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ | ||
2290 | OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ | ||
2291 | OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ | ||
2292 | dst+=dstStride;\ | ||
2293 | src+=srcStride;\ | ||
2294 | }\ | ||
2295 | }\ | ||
2296 | \ | ||
2297 | static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2298 | const int w=8;\ | ||
2299 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2300 | int i;\ | ||
2301 | for(i=0; i<w; i++)\ | ||
2302 | {\ | ||
2303 | const int srcB= src[-2*srcStride];\ | ||
2304 | const int srcA= src[-1*srcStride];\ | ||
2305 | const int src0= src[0 *srcStride];\ | ||
2306 | const int src1= src[1 *srcStride];\ | ||
2307 | const int src2= src[2 *srcStride];\ | ||
2308 | const int src3= src[3 *srcStride];\ | ||
2309 | const int src4= src[4 *srcStride];\ | ||
2310 | const int src5= src[5 *srcStride];\ | ||
2311 | const int src6= src[6 *srcStride];\ | ||
2312 | const int src7= src[7 *srcStride];\ | ||
2313 | const int src8= src[8 *srcStride];\ | ||
2314 | const int src9= src[9 *srcStride];\ | ||
2315 | const int src10=src[10*srcStride];\ | ||
2316 | OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | ||
2317 | OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | ||
2318 | OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | ||
2319 | OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | ||
2320 | OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | ||
2321 | OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | ||
2322 | OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | ||
2323 | OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | ||
2324 | dst++;\ | ||
2325 | src++;\ | ||
2326 | }\ | ||
2327 | }\ | ||
2328 | \ | ||
2329 | static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | ||
2330 | const int h=8;\ | ||
2331 | const int w=8;\ | ||
2332 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2333 | int i;\ | ||
2334 | src -= 2*srcStride;\ | ||
2335 | for(i=0; i<h+5; i++)\ | ||
2336 | {\ | ||
2337 | tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ | ||
2338 | tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ | ||
2339 | tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ | ||
2340 | tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ | ||
2341 | tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ | ||
2342 | tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ | ||
2343 | tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ | ||
2344 | tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ | ||
2345 | tmp+=tmpStride;\ | ||
2346 | src+=srcStride;\ | ||
2347 | }\ | ||
2348 | tmp -= tmpStride*(h+5-2);\ | ||
2349 | for(i=0; i<w; i++)\ | ||
2350 | {\ | ||
2351 | const int tmpB= tmp[-2*tmpStride];\ | ||
2352 | const int tmpA= tmp[-1*tmpStride];\ | ||
2353 | const int tmp0= tmp[0 *tmpStride];\ | ||
2354 | const int tmp1= tmp[1 *tmpStride];\ | ||
2355 | const int tmp2= tmp[2 *tmpStride];\ | ||
2356 | const int tmp3= tmp[3 *tmpStride];\ | ||
2357 | const int tmp4= tmp[4 *tmpStride];\ | ||
2358 | const int tmp5= tmp[5 *tmpStride];\ | ||
2359 | const int tmp6= tmp[6 *tmpStride];\ | ||
2360 | const int tmp7= tmp[7 *tmpStride];\ | ||
2361 | const int tmp8= tmp[8 *tmpStride];\ | ||
2362 | const int tmp9= tmp[9 *tmpStride];\ | ||
2363 | const int tmp10=tmp[10*tmpStride];\ | ||
2364 | OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | ||
2365 | OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | ||
2366 | OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | ||
2367 | OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | ||
2368 | OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ | ||
2369 | OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ | ||
2370 | OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ | ||
2371 | OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ | ||
2372 | dst++;\ | ||
2373 | tmp++;\ | ||
2374 | }\ | ||
2375 | }\ | ||
2376 | \ | ||
2377 | static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2378 | OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | ||
2379 | OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | ||
2380 | src += 8*srcStride;\ | ||
2381 | dst += 8*dstStride;\ | ||
2382 | OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | ||
2383 | OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | ||
2384 | }\ | ||
2385 | \ | ||
2386 | static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2387 | OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | ||
2388 | OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | ||
2389 | src += 8*srcStride;\ | ||
2390 | dst += 8*dstStride;\ | ||
2391 | OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | ||
2392 | OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | ||
2393 | }\ | ||
2394 | \ | ||
2395 | static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | ||
2396 | OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | ||
2397 | OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | ||
2398 | src += 8*srcStride;\ | ||
2399 | dst += 8*dstStride;\ | ||
2400 | OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | ||
2401 | OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | ||
2402 | }\ | ||
2403 | |||
2404 | #define H264_MC(OPNAME, SIZE) \ | ||
2405 | static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | ||
2406 | OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ | ||
2407 | }\ | ||
2408 | \ | ||
2409 | static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2410 | uint8_t half[SIZE*SIZE];\ | ||
2411 | put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | ||
2412 | OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ | ||
2413 | }\ | ||
2414 | \ | ||
2415 | static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2416 | OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ | ||
2417 | }\ | ||
2418 | \ | ||
2419 | static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2420 | uint8_t half[SIZE*SIZE];\ | ||
2421 | put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | ||
2422 | OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ | ||
2423 | }\ | ||
2424 | \ | ||
2425 | static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2426 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2427 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2428 | uint8_t half[SIZE*SIZE];\ | ||
2429 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | ||
2430 | put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | ||
2431 | OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ | ||
2432 | }\ | ||
2433 | \ | ||
2434 | static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2435 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2436 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2437 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | ||
2438 | OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ | ||
2439 | }\ | ||
2440 | \ | ||
2441 | static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2442 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2443 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2444 | uint8_t half[SIZE*SIZE];\ | ||
2445 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | ||
2446 | put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | ||
2447 | OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ | ||
2448 | }\ | ||
2449 | \ | ||
2450 | static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2451 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2452 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2453 | uint8_t halfH[SIZE*SIZE];\ | ||
2454 | uint8_t halfV[SIZE*SIZE];\ | ||
2455 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | ||
2456 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | ||
2457 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | ||
2458 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | ||
2459 | }\ | ||
2460 | \ | ||
2461 | static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2462 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2463 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2464 | uint8_t halfH[SIZE*SIZE];\ | ||
2465 | uint8_t halfV[SIZE*SIZE];\ | ||
2466 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | ||
2467 | copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | ||
2468 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | ||
2469 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | ||
2470 | }\ | ||
2471 | \ | ||
2472 | static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2473 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2474 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2475 | uint8_t halfH[SIZE*SIZE];\ | ||
2476 | uint8_t halfV[SIZE*SIZE];\ | ||
2477 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | ||
2478 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | ||
2479 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | ||
2480 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | ||
2481 | }\ | ||
2482 | \ | ||
2483 | static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2484 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2485 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2486 | uint8_t halfH[SIZE*SIZE];\ | ||
2487 | uint8_t halfV[SIZE*SIZE];\ | ||
2488 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | ||
2489 | copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | ||
2490 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | ||
2491 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | ||
2492 | }\ | ||
2493 | \ | ||
2494 | static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2495 | int16_t tmp[SIZE*(SIZE+5)];\ | ||
2496 | OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ | ||
2497 | }\ | ||
2498 | \ | ||
2499 | static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2500 | int16_t tmp[SIZE*(SIZE+5)];\ | ||
2501 | uint8_t halfH[SIZE*SIZE];\ | ||
2502 | uint8_t halfHV[SIZE*SIZE];\ | ||
2503 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | ||
2504 | put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | ||
2505 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | ||
2506 | }\ | ||
2507 | \ | ||
2508 | static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2509 | int16_t tmp[SIZE*(SIZE+5)];\ | ||
2510 | uint8_t halfH[SIZE*SIZE];\ | ||
2511 | uint8_t halfHV[SIZE*SIZE];\ | ||
2512 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | ||
2513 | put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | ||
2514 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | ||
2515 | }\ | ||
2516 | \ | ||
2517 | static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2518 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2519 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2520 | int16_t tmp[SIZE*(SIZE+5)];\ | ||
2521 | uint8_t halfV[SIZE*SIZE];\ | ||
2522 | uint8_t halfHV[SIZE*SIZE];\ | ||
2523 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | ||
2524 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | ||
2525 | put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | ||
2526 | OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | ||
2527 | }\ | ||
2528 | \ | ||
2529 | static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2530 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2531 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2532 | int16_t tmp[SIZE*(SIZE+5)];\ | ||
2533 | uint8_t halfV[SIZE*SIZE];\ | ||
2534 | uint8_t halfHV[SIZE*SIZE];\ | ||
2535 | copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | ||
2536 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | ||
2537 | put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | ||
2538 | OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | ||
2539 | }\ | ||
2540 | |||
2541 | #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | ||
2542 | //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) | ||
2543 | #define op_put(a, b) a = cm[((b) + 16)>>5] | ||
2544 | #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) | ||
2545 | #define op2_put(a, b) a = cm[((b) + 512)>>10] | ||
2546 | |||
2547 | H264_LOWPASS(put_ , op_put, op2_put) | ||
2548 | H264_LOWPASS(avg_ , op_avg, op2_avg) | ||
2549 | H264_MC(put_, 2) | ||
2550 | H264_MC(put_, 4) | ||
2551 | H264_MC(put_, 8) | ||
2552 | H264_MC(put_, 16) | ||
2553 | H264_MC(avg_, 4) | ||
2554 | H264_MC(avg_, 8) | ||
2555 | H264_MC(avg_, 16) | ||
2556 | |||
2557 | #undef op_avg | ||
2558 | #undef op_put | ||
2559 | #undef op2_avg | ||
2560 | #undef op2_put | ||
2561 | #endif | ||
2562 | |||
2563 | static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ | ||
2564 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
2565 | int i; | ||
2566 | |||
2567 | for(i=0; i<h; i++){ | ||
2568 | dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; | ||
2569 | dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; | ||
2570 | dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; | ||
2571 | dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; | ||
2572 | dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; | ||
2573 | dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; | ||
2574 | dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; | ||
2575 | dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; | ||
2576 | dst+=dstStride; | ||
2577 | src+=srcStride; | ||
2578 | } | ||
2579 | } | ||
2580 | |||
2581 | #if CONFIG_CAVS_DECODER | ||
2582 | /* AVS specific */ | ||
2583 | void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | ||
2584 | put_pixels8_c(dst, src, stride, 8); | ||
2585 | } | ||
2586 | void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | ||
2587 | avg_pixels8_c(dst, src, stride, 8); | ||
2588 | } | ||
2589 | void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | ||
2590 | put_pixels16_c(dst, src, stride, 16); | ||
2591 | } | ||
2592 | void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | ||
2593 | avg_pixels16_c(dst, src, stride, 16); | ||
2594 | } | ||
2595 | #endif /* CONFIG_CAVS_DECODER */ | ||
2596 | |||
2597 | #if CONFIG_VC1_DECODER | ||
2598 | /* VC-1 specific */ | ||
2599 | void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | ||
2600 | put_pixels8_c(dst, src, stride, 8); | ||
2601 | } | ||
2602 | void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | ||
2603 | avg_pixels8_c(dst, src, stride, 8); | ||
2604 | } | ||
2605 | #endif /* CONFIG_VC1_DECODER */ | ||
2606 | |||
2607 | #if CONFIG_RV40_DECODER | ||
2608 | static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2609 | put_pixels16_xy2_c(dst, src, stride, 16); | ||
2610 | } | ||
2611 | static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2612 | avg_pixels16_xy2_c(dst, src, stride, 16); | ||
2613 | } | ||
2614 | static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2615 | put_pixels8_xy2_c(dst, src, stride, 8); | ||
2616 | } | ||
2617 | static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2618 | avg_pixels8_xy2_c(dst, src, stride, 8); | ||
2619 | } | ||
2620 | #endif /* CONFIG_RV40_DECODER */ | ||
2621 | |||
2622 | static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ | ||
2623 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
2624 | int i; | ||
2625 | |||
2626 | for(i=0; i<w; i++){ | ||
2627 | const int src_1= src[ -srcStride]; | ||
2628 | const int src0 = src[0 ]; | ||
2629 | const int src1 = src[ srcStride]; | ||
2630 | const int src2 = src[2*srcStride]; | ||
2631 | const int src3 = src[3*srcStride]; | ||
2632 | const int src4 = src[4*srcStride]; | ||
2633 | const int src5 = src[5*srcStride]; | ||
2634 | const int src6 = src[6*srcStride]; | ||
2635 | const int src7 = src[7*srcStride]; | ||
2636 | const int src8 = src[8*srcStride]; | ||
2637 | const int src9 = src[9*srcStride]; | ||
2638 | dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; | ||
2639 | dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; | ||
2640 | dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; | ||
2641 | dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; | ||
2642 | dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; | ||
2643 | dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; | ||
2644 | dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; | ||
2645 | dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; | ||
2646 | src++; | ||
2647 | dst++; | ||
2648 | } | ||
2649 | } | ||
2650 | |||
2651 | static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ | ||
2652 | put_pixels8_c(dst, src, stride, 8); | ||
2653 | } | ||
2654 | |||
2655 | static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2656 | uint8_t half[64]; | ||
2657 | wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | ||
2658 | put_pixels8_l2(dst, src, half, stride, stride, 8, 8); | ||
2659 | } | ||
2660 | |||
2661 | static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2662 | wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); | ||
2663 | } | ||
2664 | |||
2665 | static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2666 | uint8_t half[64]; | ||
2667 | wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | ||
2668 | put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); | ||
2669 | } | ||
2670 | |||
2671 | static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2672 | wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); | ||
2673 | } | ||
2674 | |||
2675 | static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2676 | uint8_t halfH[88]; | ||
2677 | uint8_t halfV[64]; | ||
2678 | uint8_t halfHV[64]; | ||
2679 | wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | ||
2680 | wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); | ||
2681 | wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | ||
2682 | put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | ||
2683 | } | ||
2684 | static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2685 | uint8_t halfH[88]; | ||
2686 | uint8_t halfV[64]; | ||
2687 | uint8_t halfHV[64]; | ||
2688 | wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | ||
2689 | wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); | ||
2690 | wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | ||
2691 | put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | ||
2692 | } | ||
2693 | static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2694 | uint8_t halfH[88]; | ||
2695 | wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | ||
2696 | wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); | ||
2697 | } | ||
2698 | |||
2699 | static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ | ||
2700 | if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { | ||
2701 | int x; | ||
2702 | const int strength= ff_h263_loop_filter_strength[qscale]; | ||
2703 | |||
2704 | for(x=0; x<8; x++){ | ||
2705 | int d1, d2, ad1; | ||
2706 | int p0= src[x-2*stride]; | ||
2707 | int p1= src[x-1*stride]; | ||
2708 | int p2= src[x+0*stride]; | ||
2709 | int p3= src[x+1*stride]; | ||
2710 | int d = (p0 - p3 + 4*(p2 - p1)) / 8; | ||
2711 | |||
2712 | if (d<-2*strength) d1= 0; | ||
2713 | else if(d<- strength) d1=-2*strength - d; | ||
2714 | else if(d< strength) d1= d; | ||
2715 | else if(d< 2*strength) d1= 2*strength - d; | ||
2716 | else d1= 0; | ||
2717 | |||
2718 | p1 += d1; | ||
2719 | p2 -= d1; | ||
2720 | if(p1&256) p1= ~(p1>>31); | ||
2721 | if(p2&256) p2= ~(p2>>31); | ||
2722 | |||
2723 | src[x-1*stride] = p1; | ||
2724 | src[x+0*stride] = p2; | ||
2725 | |||
2726 | ad1= FFABS(d1)>>1; | ||
2727 | |||
2728 | d2= av_clip((p0-p3)/4, -ad1, ad1); | ||
2729 | |||
2730 | src[x-2*stride] = p0 - d2; | ||
2731 | src[x+ stride] = p3 + d2; | ||
2732 | } | ||
2733 | } | ||
2734 | } | ||
2735 | |||
2736 | static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ | ||
2737 | if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { | ||
2738 | int y; | ||
2739 | const int strength= ff_h263_loop_filter_strength[qscale]; | ||
2740 | |||
2741 | for(y=0; y<8; y++){ | ||
2742 | int d1, d2, ad1; | ||
2743 | int p0= src[y*stride-2]; | ||
2744 | int p1= src[y*stride-1]; | ||
2745 | int p2= src[y*stride+0]; | ||
2746 | int p3= src[y*stride+1]; | ||
2747 | int d = (p0 - p3 + 4*(p2 - p1)) / 8; | ||
2748 | |||
2749 | if (d<-2*strength) d1= 0; | ||
2750 | else if(d<- strength) d1=-2*strength - d; | ||
2751 | else if(d< strength) d1= d; | ||
2752 | else if(d< 2*strength) d1= 2*strength - d; | ||
2753 | else d1= 0; | ||
2754 | |||
2755 | p1 += d1; | ||
2756 | p2 -= d1; | ||
2757 | if(p1&256) p1= ~(p1>>31); | ||
2758 | if(p2&256) p2= ~(p2>>31); | ||
2759 | |||
2760 | src[y*stride-1] = p1; | ||
2761 | src[y*stride+0] = p2; | ||
2762 | |||
2763 | ad1= FFABS(d1)>>1; | ||
2764 | |||
2765 | d2= av_clip((p0-p3)/4, -ad1, ad1); | ||
2766 | |||
2767 | src[y*stride-2] = p0 - d2; | ||
2768 | src[y*stride+1] = p3 + d2; | ||
2769 | } | ||
2770 | } | ||
2771 | } | ||
2772 | |||
2773 | static void h261_loop_filter_c(uint8_t *src, int stride){ | ||
2774 | int x,y,xy,yz; | ||
2775 | int temp[64]; | ||
2776 | |||
2777 | for(x=0; x<8; x++){ | ||
2778 | temp[x ] = 4*src[x ]; | ||
2779 | temp[x + 7*8] = 4*src[x + 7*stride]; | ||
2780 | } | ||
2781 | for(y=1; y<7; y++){ | ||
2782 | for(x=0; x<8; x++){ | ||
2783 | xy = y * stride + x; | ||
2784 | yz = y * 8 + x; | ||
2785 | temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; | ||
2786 | } | ||
2787 | } | ||
2788 | |||
2789 | for(y=0; y<8; y++){ | ||
2790 | src[ y*stride] = (temp[ y*8] + 2)>>2; | ||
2791 | src[7+y*stride] = (temp[7+y*8] + 2)>>2; | ||
2792 | for(x=1; x<7; x++){ | ||
2793 | xy = y * stride + x; | ||
2794 | yz = y * 8 + x; | ||
2795 | src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; | ||
2796 | } | ||
2797 | } | ||
2798 | } | ||
2799 | |||
2800 | static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
2801 | { | ||
2802 | int s, i; | ||
2803 | |||
2804 | s = 0; | ||
2805 | for(i=0;i<h;i++) { | ||
2806 | s += abs(pix1[0] - pix2[0]); | ||
2807 | s += abs(pix1[1] - pix2[1]); | ||
2808 | s += abs(pix1[2] - pix2[2]); | ||
2809 | s += abs(pix1[3] - pix2[3]); | ||
2810 | s += abs(pix1[4] - pix2[4]); | ||
2811 | s += abs(pix1[5] - pix2[5]); | ||
2812 | s += abs(pix1[6] - pix2[6]); | ||
2813 | s += abs(pix1[7] - pix2[7]); | ||
2814 | s += abs(pix1[8] - pix2[8]); | ||
2815 | s += abs(pix1[9] - pix2[9]); | ||
2816 | s += abs(pix1[10] - pix2[10]); | ||
2817 | s += abs(pix1[11] - pix2[11]); | ||
2818 | s += abs(pix1[12] - pix2[12]); | ||
2819 | s += abs(pix1[13] - pix2[13]); | ||
2820 | s += abs(pix1[14] - pix2[14]); | ||
2821 | s += abs(pix1[15] - pix2[15]); | ||
2822 | pix1 += line_size; | ||
2823 | pix2 += line_size; | ||
2824 | } | ||
2825 | return s; | ||
2826 | } | ||
2827 | |||
2828 | static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
2829 | { | ||
2830 | int s, i; | ||
2831 | |||
2832 | s = 0; | ||
2833 | for(i=0;i<h;i++) { | ||
2834 | s += abs(pix1[0] - avg2(pix2[0], pix2[1])); | ||
2835 | s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | ||
2836 | s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | ||
2837 | s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | ||
2838 | s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | ||
2839 | s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | ||
2840 | s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | ||
2841 | s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | ||
2842 | s += abs(pix1[8] - avg2(pix2[8], pix2[9])); | ||
2843 | s += abs(pix1[9] - avg2(pix2[9], pix2[10])); | ||
2844 | s += abs(pix1[10] - avg2(pix2[10], pix2[11])); | ||
2845 | s += abs(pix1[11] - avg2(pix2[11], pix2[12])); | ||
2846 | s += abs(pix1[12] - avg2(pix2[12], pix2[13])); | ||
2847 | s += abs(pix1[13] - avg2(pix2[13], pix2[14])); | ||
2848 | s += abs(pix1[14] - avg2(pix2[14], pix2[15])); | ||
2849 | s += abs(pix1[15] - avg2(pix2[15], pix2[16])); | ||
2850 | pix1 += line_size; | ||
2851 | pix2 += line_size; | ||
2852 | } | ||
2853 | return s; | ||
2854 | } | ||
2855 | |||
2856 | static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
2857 | { | ||
2858 | int s, i; | ||
2859 | uint8_t *pix3 = pix2 + line_size; | ||
2860 | |||
2861 | s = 0; | ||
2862 | for(i=0;i<h;i++) { | ||
2863 | s += abs(pix1[0] - avg2(pix2[0], pix3[0])); | ||
2864 | s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | ||
2865 | s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | ||
2866 | s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | ||
2867 | s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | ||
2868 | s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | ||
2869 | s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | ||
2870 | s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | ||
2871 | s += abs(pix1[8] - avg2(pix2[8], pix3[8])); | ||
2872 | s += abs(pix1[9] - avg2(pix2[9], pix3[9])); | ||
2873 | s += abs(pix1[10] - avg2(pix2[10], pix3[10])); | ||
2874 | s += abs(pix1[11] - avg2(pix2[11], pix3[11])); | ||
2875 | s += abs(pix1[12] - avg2(pix2[12], pix3[12])); | ||
2876 | s += abs(pix1[13] - avg2(pix2[13], pix3[13])); | ||
2877 | s += abs(pix1[14] - avg2(pix2[14], pix3[14])); | ||
2878 | s += abs(pix1[15] - avg2(pix2[15], pix3[15])); | ||
2879 | pix1 += line_size; | ||
2880 | pix2 += line_size; | ||
2881 | pix3 += line_size; | ||
2882 | } | ||
2883 | return s; | ||
2884 | } | ||
2885 | |||
2886 | static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
2887 | { | ||
2888 | int s, i; | ||
2889 | uint8_t *pix3 = pix2 + line_size; | ||
2890 | |||
2891 | s = 0; | ||
2892 | for(i=0;i<h;i++) { | ||
2893 | s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); | ||
2894 | s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | ||
2895 | s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | ||
2896 | s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | ||
2897 | s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | ||
2898 | s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | ||
2899 | s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | ||
2900 | s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | ||
2901 | s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); | ||
2902 | s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); | ||
2903 | s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); | ||
2904 | s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); | ||
2905 | s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); | ||
2906 | s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); | ||
2907 | s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); | ||
2908 | s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); | ||
2909 | pix1 += line_size; | ||
2910 | pix2 += line_size; | ||
2911 | pix3 += line_size; | ||
2912 | } | ||
2913 | return s; | ||
2914 | } | ||
2915 | |||
2916 | static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
2917 | { | ||
2918 | int s, i; | ||
2919 | |||
2920 | s = 0; | ||
2921 | for(i=0;i<h;i++) { | ||
2922 | s += abs(pix1[0] - pix2[0]); | ||
2923 | s += abs(pix1[1] - pix2[1]); | ||
2924 | s += abs(pix1[2] - pix2[2]); | ||
2925 | s += abs(pix1[3] - pix2[3]); | ||
2926 | s += abs(pix1[4] - pix2[4]); | ||
2927 | s += abs(pix1[5] - pix2[5]); | ||
2928 | s += abs(pix1[6] - pix2[6]); | ||
2929 | s += abs(pix1[7] - pix2[7]); | ||
2930 | pix1 += line_size; | ||
2931 | pix2 += line_size; | ||
2932 | } | ||
2933 | return s; | ||
2934 | } | ||
2935 | |||
2936 | static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
2937 | { | ||
2938 | int s, i; | ||
2939 | |||
2940 | s = 0; | ||
2941 | for(i=0;i<h;i++) { | ||
2942 | s += abs(pix1[0] - avg2(pix2[0], pix2[1])); | ||
2943 | s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | ||
2944 | s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | ||
2945 | s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | ||
2946 | s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | ||
2947 | s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | ||
2948 | s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | ||
2949 | s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | ||
2950 | pix1 += line_size; | ||
2951 | pix2 += line_size; | ||
2952 | } | ||
2953 | return s; | ||
2954 | } | ||
2955 | |||
2956 | static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
2957 | { | ||
2958 | int s, i; | ||
2959 | uint8_t *pix3 = pix2 + line_size; | ||
2960 | |||
2961 | s = 0; | ||
2962 | for(i=0;i<h;i++) { | ||
2963 | s += abs(pix1[0] - avg2(pix2[0], pix3[0])); | ||
2964 | s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | ||
2965 | s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | ||
2966 | s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | ||
2967 | s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | ||
2968 | s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | ||
2969 | s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | ||
2970 | s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | ||
2971 | pix1 += line_size; | ||
2972 | pix2 += line_size; | ||
2973 | pix3 += line_size; | ||
2974 | } | ||
2975 | return s; | ||
2976 | } | ||
2977 | |||
2978 | static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
2979 | { | ||
2980 | int s, i; | ||
2981 | uint8_t *pix3 = pix2 + line_size; | ||
2982 | |||
2983 | s = 0; | ||
2984 | for(i=0;i<h;i++) { | ||
2985 | s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); | ||
2986 | s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | ||
2987 | s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | ||
2988 | s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | ||
2989 | s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | ||
2990 | s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | ||
2991 | s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | ||
2992 | s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | ||
2993 | pix1 += line_size; | ||
2994 | pix2 += line_size; | ||
2995 | pix3 += line_size; | ||
2996 | } | ||
2997 | return s; | ||
2998 | } | ||
2999 | |||
3000 | static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ | ||
3001 | MpegEncContext *c = v; | ||
3002 | int score1=0; | ||
3003 | int score2=0; | ||
3004 | int x,y; | ||
3005 | |||
3006 | for(y=0; y<h; y++){ | ||
3007 | for(x=0; x<16; x++){ | ||
3008 | score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); | ||
3009 | } | ||
3010 | if(y+1<h){ | ||
3011 | for(x=0; x<15; x++){ | ||
3012 | score2+= FFABS( s1[x ] - s1[x +stride] | ||
3013 | - s1[x+1] + s1[x+1+stride]) | ||
3014 | -FFABS( s2[x ] - s2[x +stride] | ||
3015 | - s2[x+1] + s2[x+1+stride]); | ||
3016 | } | ||
3017 | } | ||
3018 | s1+= stride; | ||
3019 | s2+= stride; | ||
3020 | } | ||
3021 | |||
3022 | if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; | ||
3023 | else return score1 + FFABS(score2)*8; | ||
3024 | } | ||
3025 | |||
3026 | static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ | ||
3027 | MpegEncContext *c = v; | ||
3028 | int score1=0; | ||
3029 | int score2=0; | ||
3030 | int x,y; | ||
3031 | |||
3032 | for(y=0; y<h; y++){ | ||
3033 | for(x=0; x<8; x++){ | ||
3034 | score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); | ||
3035 | } | ||
3036 | if(y+1<h){ | ||
3037 | for(x=0; x<7; x++){ | ||
3038 | score2+= FFABS( s1[x ] - s1[x +stride] | ||
3039 | - s1[x+1] + s1[x+1+stride]) | ||
3040 | -FFABS( s2[x ] - s2[x +stride] | ||
3041 | - s2[x+1] + s2[x+1+stride]); | ||
3042 | } | ||
3043 | } | ||
3044 | s1+= stride; | ||
3045 | s2+= stride; | ||
3046 | } | ||
3047 | |||
3048 | if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; | ||
3049 | else return score1 + FFABS(score2)*8; | ||
3050 | } | ||
3051 | |||
3052 | static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ | ||
3053 | int i; | ||
3054 | unsigned int sum=0; | ||
3055 | |||
3056 | for(i=0; i<8*8; i++){ | ||
3057 | int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); | ||
3058 | int w= weight[i]; | ||
3059 | b>>= RECON_SHIFT; | ||
3060 | assert(-512<b && b<512); | ||
3061 | |||
3062 | sum += (w*b)*(w*b)>>4; | ||
3063 | } | ||
3064 | return sum>>2; | ||
3065 | } | ||
3066 | |||
3067 | static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ | ||
3068 | int i; | ||
3069 | |||
3070 | for(i=0; i<8*8; i++){ | ||
3071 | rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); | ||
3072 | } | ||
3073 | } | ||
3074 | |||
3075 | /** | ||
3076 | * permutes an 8x8 block. | ||
3077 | * @param block the block which will be permuted according to the given permutation vector | ||
3078 | * @param permutation the permutation vector | ||
3079 | * @param last the last non zero coefficient in scantable order, used to speed the permutation up | ||
3080 | * @param scantable the used scantable, this is only used to speed the permutation up, the block is not | ||
3081 | * (inverse) permutated to scantable order! | ||
3082 | */ | ||
3083 | void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) | ||
3084 | { | ||
3085 | int i; | ||
3086 | DCTELEM temp[64]; | ||
3087 | |||
3088 | if(last<=0) return; | ||
3089 | //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations | ||
3090 | |||
3091 | for(i=0; i<=last; i++){ | ||
3092 | const int j= scantable[i]; | ||
3093 | temp[j]= block[j]; | ||
3094 | block[j]=0; | ||
3095 | } | ||
3096 | |||
3097 | for(i=0; i<=last; i++){ | ||
3098 | const int j= scantable[i]; | ||
3099 | const int perm_j= permutation[j]; | ||
3100 | block[perm_j]= temp[j]; | ||
3101 | } | ||
3102 | } | ||
3103 | |||
3104 | static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ | ||
3105 | return 0; | ||
3106 | } | ||
3107 | |||
3108 | void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ | ||
3109 | int i; | ||
3110 | |||
3111 | memset(cmp, 0, sizeof(void*)*6); | ||
3112 | |||
3113 | for(i=0; i<6; i++){ | ||
3114 | switch(type&0xFF){ | ||
3115 | case FF_CMP_SAD: | ||
3116 | cmp[i]= c->sad[i]; | ||
3117 | break; | ||
3118 | case FF_CMP_SATD: | ||
3119 | cmp[i]= c->hadamard8_diff[i]; | ||
3120 | break; | ||
3121 | case FF_CMP_SSE: | ||
3122 | cmp[i]= c->sse[i]; | ||
3123 | break; | ||
3124 | case FF_CMP_DCT: | ||
3125 | cmp[i]= c->dct_sad[i]; | ||
3126 | break; | ||
3127 | case FF_CMP_DCT264: | ||
3128 | cmp[i]= c->dct264_sad[i]; | ||
3129 | break; | ||
3130 | case FF_CMP_DCTMAX: | ||
3131 | cmp[i]= c->dct_max[i]; | ||
3132 | break; | ||
3133 | case FF_CMP_PSNR: | ||
3134 | cmp[i]= c->quant_psnr[i]; | ||
3135 | break; | ||
3136 | case FF_CMP_BIT: | ||
3137 | cmp[i]= c->bit[i]; | ||
3138 | break; | ||
3139 | case FF_CMP_RD: | ||
3140 | cmp[i]= c->rd[i]; | ||
3141 | break; | ||
3142 | case FF_CMP_VSAD: | ||
3143 | cmp[i]= c->vsad[i]; | ||
3144 | break; | ||
3145 | case FF_CMP_VSSE: | ||
3146 | cmp[i]= c->vsse[i]; | ||
3147 | break; | ||
3148 | case FF_CMP_ZERO: | ||
3149 | cmp[i]= zero_cmp; | ||
3150 | break; | ||
3151 | case FF_CMP_NSSE: | ||
3152 | cmp[i]= c->nsse[i]; | ||
3153 | break; | ||
3154 | #if CONFIG_DWT | ||
3155 | case FF_CMP_W53: | ||
3156 | cmp[i]= c->w53[i]; | ||
3157 | break; | ||
3158 | case FF_CMP_W97: | ||
3159 | cmp[i]= c->w97[i]; | ||
3160 | break; | ||
3161 | #endif | ||
3162 | default: | ||
3163 | av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); | ||
3164 | } | ||
3165 | } | ||
3166 | } | ||
3167 | |||
3168 | static void clear_block_c(DCTELEM *block) | ||
3169 | { | ||
3170 | memset(block, 0, sizeof(DCTELEM)*64); | ||
3171 | } | ||
3172 | |||
3173 | /** | ||
3174 | * memset(blocks, 0, sizeof(DCTELEM)*6*64) | ||
3175 | */ | ||
3176 | static void clear_blocks_c(DCTELEM *blocks) | ||
3177 | { | ||
3178 | memset(blocks, 0, sizeof(DCTELEM)*6*64); | ||
3179 | } | ||
3180 | |||
3181 | static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ | ||
3182 | long i; | ||
3183 | for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | ||
3184 | long a = *(long*)(src+i); | ||
3185 | long b = *(long*)(dst+i); | ||
3186 | *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); | ||
3187 | } | ||
3188 | for(; i<w; i++) | ||
3189 | dst[i+0] += src[i+0]; | ||
3190 | } | ||
3191 | |||
3192 | static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | ||
3193 | long i; | ||
3194 | for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | ||
3195 | long a = *(long*)(src1+i); | ||
3196 | long b = *(long*)(src2+i); | ||
3197 | *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); | ||
3198 | } | ||
3199 | for(; i<w; i++) | ||
3200 | dst[i] = src1[i]+src2[i]; | ||
3201 | } | ||
3202 | |||
3203 | static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | ||
3204 | long i; | ||
3205 | #if !HAVE_FAST_UNALIGNED | ||
3206 | if((long)src2 & (sizeof(long)-1)){ | ||
3207 | for(i=0; i+7<w; i+=8){ | ||
3208 | dst[i+0] = src1[i+0]-src2[i+0]; | ||
3209 | dst[i+1] = src1[i+1]-src2[i+1]; | ||
3210 | dst[i+2] = src1[i+2]-src2[i+2]; | ||
3211 | dst[i+3] = src1[i+3]-src2[i+3]; | ||
3212 | dst[i+4] = src1[i+4]-src2[i+4]; | ||
3213 | dst[i+5] = src1[i+5]-src2[i+5]; | ||
3214 | dst[i+6] = src1[i+6]-src2[i+6]; | ||
3215 | dst[i+7] = src1[i+7]-src2[i+7]; | ||
3216 | } | ||
3217 | }else | ||
3218 | #endif | ||
3219 | for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | ||
3220 | long a = *(long*)(src1+i); | ||
3221 | long b = *(long*)(src2+i); | ||
3222 | *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80); | ||
3223 | } | ||
3224 | for(; i<w; i++) | ||
3225 | dst[i+0] = src1[i+0]-src2[i+0]; | ||
3226 | } | ||
3227 | |||
3228 | static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){ | ||
3229 | int i; | ||
3230 | uint8_t l, lt; | ||
3231 | |||
3232 | l= *left; | ||
3233 | lt= *left_top; | ||
3234 | |||
3235 | for(i=0; i<w; i++){ | ||
3236 | l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i]; | ||
3237 | lt= src1[i]; | ||
3238 | dst[i]= l; | ||
3239 | } | ||
3240 | |||
3241 | *left= l; | ||
3242 | *left_top= lt; | ||
3243 | } | ||
3244 | |||
3245 | static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){ | ||
3246 | int i; | ||
3247 | uint8_t l, lt; | ||
3248 | |||
3249 | l= *left; | ||
3250 | lt= *left_top; | ||
3251 | |||
3252 | for(i=0; i<w; i++){ | ||
3253 | const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); | ||
3254 | lt= src1[i]; | ||
3255 | l= src2[i]; | ||
3256 | dst[i]= l - pred; | ||
3257 | } | ||
3258 | |||
3259 | *left= l; | ||
3260 | *left_top= lt; | ||
3261 | } | ||
3262 | |||
3263 | static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){ | ||
3264 | int i; | ||
3265 | |||
3266 | for(i=0; i<w-1; i++){ | ||
3267 | acc+= src[i]; | ||
3268 | dst[i]= acc; | ||
3269 | i++; | ||
3270 | acc+= src[i]; | ||
3271 | dst[i]= acc; | ||
3272 | } | ||
3273 | |||
3274 | for(; i<w; i++){ | ||
3275 | acc+= src[i]; | ||
3276 | dst[i]= acc; | ||
3277 | } | ||
3278 | |||
3279 | return acc; | ||
3280 | } | ||
3281 | |||
3282 | #if HAVE_BIGENDIAN | ||
3283 | #define B 3 | ||
3284 | #define G 2 | ||
3285 | #define R 1 | ||
3286 | #define A 0 | ||
3287 | #else | ||
3288 | #define B 0 | ||
3289 | #define G 1 | ||
3290 | #define R 2 | ||
3291 | #define A 3 | ||
3292 | #endif | ||
3293 | static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){ | ||
3294 | int i; | ||
3295 | int r,g,b,a; | ||
3296 | r= *red; | ||
3297 | g= *green; | ||
3298 | b= *blue; | ||
3299 | a= *alpha; | ||
3300 | |||
3301 | for(i=0; i<w; i++){ | ||
3302 | b+= src[4*i+B]; | ||
3303 | g+= src[4*i+G]; | ||
3304 | r+= src[4*i+R]; | ||
3305 | a+= src[4*i+A]; | ||
3306 | |||
3307 | dst[4*i+B]= b; | ||
3308 | dst[4*i+G]= g; | ||
3309 | dst[4*i+R]= r; | ||
3310 | dst[4*i+A]= a; | ||
3311 | } | ||
3312 | |||
3313 | *red= r; | ||
3314 | *green= g; | ||
3315 | *blue= b; | ||
3316 | *alpha= a; | ||
3317 | } | ||
3318 | #undef B | ||
3319 | #undef G | ||
3320 | #undef R | ||
3321 | #undef A | ||
3322 | |||
3323 | #define BUTTERFLY2(o1,o2,i1,i2) \ | ||
3324 | o1= (i1)+(i2);\ | ||
3325 | o2= (i1)-(i2); | ||
3326 | |||
3327 | #define BUTTERFLY1(x,y) \ | ||
3328 | {\ | ||
3329 | int a,b;\ | ||
3330 | a= x;\ | ||
3331 | b= y;\ | ||
3332 | x= a+b;\ | ||
3333 | y= a-b;\ | ||
3334 | } | ||
3335 | |||
3336 | #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y))) | ||
3337 | |||
3338 | static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ | ||
3339 | int i; | ||
3340 | int temp[64]; | ||
3341 | int sum=0; | ||
3342 | |||
3343 | assert(h==8); | ||
3344 | |||
3345 | for(i=0; i<8; i++){ | ||
3346 | //FIXME try pointer walks | ||
3347 | BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); | ||
3348 | BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); | ||
3349 | BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); | ||
3350 | BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); | ||
3351 | |||
3352 | BUTTERFLY1(temp[8*i+0], temp[8*i+2]); | ||
3353 | BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | ||
3354 | BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | ||
3355 | BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | ||
3356 | |||
3357 | BUTTERFLY1(temp[8*i+0], temp[8*i+4]); | ||
3358 | BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | ||
3359 | BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | ||
3360 | BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | ||
3361 | } | ||
3362 | |||
3363 | for(i=0; i<8; i++){ | ||
3364 | BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | ||
3365 | BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | ||
3366 | BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | ||
3367 | BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | ||
3368 | |||
3369 | BUTTERFLY1(temp[8*0+i], temp[8*2+i]); | ||
3370 | BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | ||
3371 | BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | ||
3372 | BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | ||
3373 | |||
3374 | sum += | ||
3375 | BUTTERFLYA(temp[8*0+i], temp[8*4+i]) | ||
3376 | +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | ||
3377 | +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | ||
3378 | +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | ||
3379 | } | ||
3380 | #if 0 | ||
3381 | static int maxi=0; | ||
3382 | if(sum>maxi){ | ||
3383 | maxi=sum; | ||
3384 | printf("MAX:%d\n", maxi); | ||
3385 | } | ||
3386 | #endif | ||
3387 | return sum; | ||
3388 | } | ||
3389 | |||
3390 | static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ | ||
3391 | int i; | ||
3392 | int temp[64]; | ||
3393 | int sum=0; | ||
3394 | |||
3395 | assert(h==8); | ||
3396 | |||
3397 | for(i=0; i<8; i++){ | ||
3398 | //FIXME try pointer walks | ||
3399 | BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); | ||
3400 | BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); | ||
3401 | BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); | ||
3402 | BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); | ||
3403 | |||
3404 | BUTTERFLY1(temp[8*i+0], temp[8*i+2]); | ||
3405 | BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | ||
3406 | BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | ||
3407 | BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | ||
3408 | |||
3409 | BUTTERFLY1(temp[8*i+0], temp[8*i+4]); | ||
3410 | BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | ||
3411 | BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | ||
3412 | BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | ||
3413 | } | ||
3414 | |||
3415 | for(i=0; i<8; i++){ | ||
3416 | BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | ||
3417 | BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | ||
3418 | BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | ||
3419 | BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | ||
3420 | |||
3421 | BUTTERFLY1(temp[8*0+i], temp[8*2+i]); | ||
3422 | BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | ||
3423 | BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | ||
3424 | BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | ||
3425 | |||
3426 | sum += | ||
3427 | BUTTERFLYA(temp[8*0+i], temp[8*4+i]) | ||
3428 | +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | ||
3429 | +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | ||
3430 | +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | ||
3431 | } | ||
3432 | |||
3433 | sum -= FFABS(temp[8*0] + temp[8*4]); // -mean | ||
3434 | |||
3435 | return sum; | ||
3436 | } | ||
3437 | |||
3438 | static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||
3439 | MpegEncContext * const s= (MpegEncContext *)c; | ||
3440 | LOCAL_ALIGNED_16(DCTELEM, temp, [64]); | ||
3441 | |||
3442 | assert(h==8); | ||
3443 | |||
3444 | s->dsp.diff_pixels(temp, src1, src2, stride); | ||
3445 | s->dsp.fdct(temp); | ||
3446 | return s->dsp.sum_abs_dctelem(temp); | ||
3447 | } | ||
3448 | |||
3449 | #if CONFIG_GPL | ||
3450 | #define DCT8_1D {\ | ||
3451 | const int s07 = SRC(0) + SRC(7);\ | ||
3452 | const int s16 = SRC(1) + SRC(6);\ | ||
3453 | const int s25 = SRC(2) + SRC(5);\ | ||
3454 | const int s34 = SRC(3) + SRC(4);\ | ||
3455 | const int a0 = s07 + s34;\ | ||
3456 | const int a1 = s16 + s25;\ | ||
3457 | const int a2 = s07 - s34;\ | ||
3458 | const int a3 = s16 - s25;\ | ||
3459 | const int d07 = SRC(0) - SRC(7);\ | ||
3460 | const int d16 = SRC(1) - SRC(6);\ | ||
3461 | const int d25 = SRC(2) - SRC(5);\ | ||
3462 | const int d34 = SRC(3) - SRC(4);\ | ||
3463 | const int a4 = d16 + d25 + (d07 + (d07>>1));\ | ||
3464 | const int a5 = d07 - d34 - (d25 + (d25>>1));\ | ||
3465 | const int a6 = d07 + d34 - (d16 + (d16>>1));\ | ||
3466 | const int a7 = d16 - d25 + (d34 + (d34>>1));\ | ||
3467 | DST(0, a0 + a1 ) ;\ | ||
3468 | DST(1, a4 + (a7>>2)) ;\ | ||
3469 | DST(2, a2 + (a3>>1)) ;\ | ||
3470 | DST(3, a5 + (a6>>2)) ;\ | ||
3471 | DST(4, a0 - a1 ) ;\ | ||
3472 | DST(5, a6 - (a5>>2)) ;\ | ||
3473 | DST(6, (a2>>1) - a3 ) ;\ | ||
3474 | DST(7, (a4>>2) - a7 ) ;\ | ||
3475 | } | ||
3476 | |||
3477 | static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||
3478 | MpegEncContext * const s= (MpegEncContext *)c; | ||
3479 | DCTELEM dct[8][8]; | ||
3480 | int i; | ||
3481 | int sum=0; | ||
3482 | |||
3483 | s->dsp.diff_pixels(dct[0], src1, src2, stride); | ||
3484 | |||
3485 | #define SRC(x) dct[i][x] | ||
3486 | #define DST(x,v) dct[i][x]= v | ||
3487 | for( i = 0; i < 8; i++ ) | ||
3488 | DCT8_1D | ||
3489 | #undef SRC | ||
3490 | #undef DST | ||
3491 | |||
3492 | #define SRC(x) dct[x][i] | ||
3493 | #define DST(x,v) sum += FFABS(v) | ||
3494 | for( i = 0; i < 8; i++ ) | ||
3495 | DCT8_1D | ||
3496 | #undef SRC | ||
3497 | #undef DST | ||
3498 | return sum; | ||
3499 | } | ||
3500 | #endif | ||
3501 | |||
3502 | static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||
3503 | MpegEncContext * const s= (MpegEncContext *)c; | ||
3504 | LOCAL_ALIGNED_16(DCTELEM, temp, [64]); | ||
3505 | int sum=0, i; | ||
3506 | |||
3507 | assert(h==8); | ||
3508 | |||
3509 | s->dsp.diff_pixels(temp, src1, src2, stride); | ||
3510 | s->dsp.fdct(temp); | ||
3511 | |||
3512 | for(i=0; i<64; i++) | ||
3513 | sum= FFMAX(sum, FFABS(temp[i])); | ||
3514 | |||
3515 | return sum; | ||
3516 | } | ||
3517 | |||
3518 | static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||
3519 | MpegEncContext * const s= (MpegEncContext *)c; | ||
3520 | LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]); | ||
3521 | DCTELEM * const bak = temp+64; | ||
3522 | int sum=0, i; | ||
3523 | |||
3524 | assert(h==8); | ||
3525 | s->mb_intra=0; | ||
3526 | |||
3527 | s->dsp.diff_pixels(temp, src1, src2, stride); | ||
3528 | |||
3529 | memcpy(bak, temp, 64*sizeof(DCTELEM)); | ||
3530 | |||
3531 | s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); | ||
3532 | s->dct_unquantize_inter(s, temp, 0, s->qscale); | ||
3533 | ff_simple_idct(temp); //FIXME | ||
3534 | |||
3535 | for(i=0; i<64; i++) | ||
3536 | sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | ||
3537 | |||
3538 | return sum; | ||
3539 | } | ||
3540 | |||
3541 | static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||
3542 | MpegEncContext * const s= (MpegEncContext *)c; | ||
3543 | const uint8_t *scantable= s->intra_scantable.permutated; | ||
3544 | LOCAL_ALIGNED_16(DCTELEM, temp, [64]); | ||
3545 | LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]); | ||
3546 | LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]); | ||
3547 | int i, last, run, bits, level, distortion, start_i; | ||
3548 | const int esc_length= s->ac_esc_length; | ||
3549 | uint8_t * length; | ||
3550 | uint8_t * last_length; | ||
3551 | |||
3552 | assert(h==8); | ||
3553 | |||
3554 | copy_block8(lsrc1, src1, 8, stride, 8); | ||
3555 | copy_block8(lsrc2, src2, 8, stride, 8); | ||
3556 | |||
3557 | s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8); | ||
3558 | |||
3559 | s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); | ||
3560 | |||
3561 | bits=0; | ||
3562 | |||
3563 | if (s->mb_intra) { | ||
3564 | start_i = 1; | ||
3565 | length = s->intra_ac_vlc_length; | ||
3566 | last_length= s->intra_ac_vlc_last_length; | ||
3567 | bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma | ||
3568 | } else { | ||
3569 | start_i = 0; | ||
3570 | length = s->inter_ac_vlc_length; | ||
3571 | last_length= s->inter_ac_vlc_last_length; | ||
3572 | } | ||
3573 | |||
3574 | if(last>=start_i){ | ||
3575 | run=0; | ||
3576 | for(i=start_i; i<last; i++){ | ||
3577 | int j= scantable[i]; | ||
3578 | level= temp[j]; | ||
3579 | |||
3580 | if(level){ | ||
3581 | level+=64; | ||
3582 | if((level&(~127)) == 0){ | ||
3583 | bits+= length[UNI_AC_ENC_INDEX(run, level)]; | ||
3584 | }else | ||
3585 | bits+= esc_length; | ||
3586 | run=0; | ||
3587 | }else | ||
3588 | run++; | ||
3589 | } | ||
3590 | i= scantable[last]; | ||
3591 | |||
3592 | level= temp[i] + 64; | ||
3593 | |||
3594 | assert(level - 64); | ||
3595 | |||
3596 | if((level&(~127)) == 0){ | ||
3597 | bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | ||
3598 | }else | ||
3599 | bits+= esc_length; | ||
3600 | |||
3601 | } | ||
3602 | |||
3603 | if(last>=0){ | ||
3604 | if(s->mb_intra) | ||
3605 | s->dct_unquantize_intra(s, temp, 0, s->qscale); | ||
3606 | else | ||
3607 | s->dct_unquantize_inter(s, temp, 0, s->qscale); | ||
3608 | } | ||
3609 | |||
3610 | s->dsp.idct_add(lsrc2, 8, temp); | ||
3611 | |||
3612 | distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); | ||
3613 | |||
3614 | return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); | ||
3615 | } | ||
3616 | |||
3617 | static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||
3618 | MpegEncContext * const s= (MpegEncContext *)c; | ||
3619 | const uint8_t *scantable= s->intra_scantable.permutated; | ||
3620 | LOCAL_ALIGNED_16(DCTELEM, temp, [64]); | ||
3621 | int i, last, run, bits, level, start_i; | ||
3622 | const int esc_length= s->ac_esc_length; | ||
3623 | uint8_t * length; | ||
3624 | uint8_t * last_length; | ||
3625 | |||
3626 | assert(h==8); | ||
3627 | |||
3628 | s->dsp.diff_pixels(temp, src1, src2, stride); | ||
3629 | |||
3630 | s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); | ||
3631 | |||
3632 | bits=0; | ||
3633 | |||
3634 | if (s->mb_intra) { | ||
3635 | start_i = 1; | ||
3636 | length = s->intra_ac_vlc_length; | ||
3637 | last_length= s->intra_ac_vlc_last_length; | ||
3638 | bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma | ||
3639 | } else { | ||
3640 | start_i = 0; | ||
3641 | length = s->inter_ac_vlc_length; | ||
3642 | last_length= s->inter_ac_vlc_last_length; | ||
3643 | } | ||
3644 | |||
3645 | if(last>=start_i){ | ||
3646 | run=0; | ||
3647 | for(i=start_i; i<last; i++){ | ||
3648 | int j= scantable[i]; | ||
3649 | level= temp[j]; | ||
3650 | |||
3651 | if(level){ | ||
3652 | level+=64; | ||
3653 | if((level&(~127)) == 0){ | ||
3654 | bits+= length[UNI_AC_ENC_INDEX(run, level)]; | ||
3655 | }else | ||
3656 | bits+= esc_length; | ||
3657 | run=0; | ||
3658 | }else | ||
3659 | run++; | ||
3660 | } | ||
3661 | i= scantable[last]; | ||
3662 | |||
3663 | level= temp[i] + 64; | ||
3664 | |||
3665 | assert(level - 64); | ||
3666 | |||
3667 | if((level&(~127)) == 0){ | ||
3668 | bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | ||
3669 | }else | ||
3670 | bits+= esc_length; | ||
3671 | } | ||
3672 | |||
3673 | return bits; | ||
3674 | } | ||
3675 | |||
3676 | #define VSAD_INTRA(size) \ | ||
3677 | static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ | ||
3678 | int score=0; \ | ||
3679 | int x,y; \ | ||
3680 | \ | ||
3681 | for(y=1; y<h; y++){ \ | ||
3682 | for(x=0; x<size; x+=4){ \ | ||
3683 | score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \ | ||
3684 | +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \ | ||
3685 | } \ | ||
3686 | s+= stride; \ | ||
3687 | } \ | ||
3688 | \ | ||
3689 | return score; \ | ||
3690 | } | ||
3691 | VSAD_INTRA(8) | ||
3692 | VSAD_INTRA(16) | ||
3693 | |||
3694 | static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | ||
3695 | int score=0; | ||
3696 | int x,y; | ||
3697 | |||
3698 | for(y=1; y<h; y++){ | ||
3699 | for(x=0; x<16; x++){ | ||
3700 | score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | ||
3701 | } | ||
3702 | s1+= stride; | ||
3703 | s2+= stride; | ||
3704 | } | ||
3705 | |||
3706 | return score; | ||
3707 | } | ||
3708 | |||
3709 | #define SQ(a) ((a)*(a)) | ||
3710 | #define VSSE_INTRA(size) \ | ||
3711 | static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ | ||
3712 | int score=0; \ | ||
3713 | int x,y; \ | ||
3714 | \ | ||
3715 | for(y=1; y<h; y++){ \ | ||
3716 | for(x=0; x<size; x+=4){ \ | ||
3717 | score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \ | ||
3718 | +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \ | ||
3719 | } \ | ||
3720 | s+= stride; \ | ||
3721 | } \ | ||
3722 | \ | ||
3723 | return score; \ | ||
3724 | } | ||
3725 | VSSE_INTRA(8) | ||
3726 | VSSE_INTRA(16) | ||
3727 | |||
3728 | static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | ||
3729 | int score=0; | ||
3730 | int x,y; | ||
3731 | |||
3732 | for(y=1; y<h; y++){ | ||
3733 | for(x=0; x<16; x++){ | ||
3734 | score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | ||
3735 | } | ||
3736 | s1+= stride; | ||
3737 | s2+= stride; | ||
3738 | } | ||
3739 | |||
3740 | return score; | ||
3741 | } | ||
3742 | |||
3743 | static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, | ||
3744 | int size){ | ||
3745 | int score=0; | ||
3746 | int i; | ||
3747 | for(i=0; i<size; i++) | ||
3748 | score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); | ||
3749 | return score; | ||
3750 | } | ||
3751 | |||
3752 | WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) | ||
3753 | WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) | ||
3754 | WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) | ||
3755 | #if CONFIG_GPL | ||
3756 | WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) | ||
3757 | #endif | ||
3758 | WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) | ||
3759 | WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) | ||
3760 | WRAPPER8_16_SQ(rd8x8_c, rd16_c) | ||
3761 | WRAPPER8_16_SQ(bit8x8_c, bit16_c) | ||
3762 | |||
3763 | static void vector_fmul_c(float *dst, const float *src, int len){ | ||
3764 | int i; | ||
3765 | for(i=0; i<len; i++) | ||
3766 | dst[i] *= src[i]; | ||
3767 | } | ||
3768 | |||
3769 | static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){ | ||
3770 | int i; | ||
3771 | src1 += len-1; | ||
3772 | for(i=0; i<len; i++) | ||
3773 | dst[i] = src0[i] * src1[-i]; | ||
3774 | } | ||
3775 | |||
3776 | static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){ | ||
3777 | int i; | ||
3778 | for(i=0; i<len; i++) | ||
3779 | dst[i] = src0[i] * src1[i] + src2[i]; | ||
3780 | } | ||
3781 | |||
3782 | void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){ | ||
3783 | int i,j; | ||
3784 | dst += len; | ||
3785 | win += len; | ||
3786 | src0+= len; | ||
3787 | for(i=-len, j=len-1; i<0; i++, j--) { | ||
3788 | float s0 = src0[i]; | ||
3789 | float s1 = src1[j]; | ||
3790 | float wi = win[i]; | ||
3791 | float wj = win[j]; | ||
3792 | dst[i] = s0*wj - s1*wi + add_bias; | ||
3793 | dst[j] = s0*wi + s1*wj + add_bias; | ||
3794 | } | ||
3795 | } | ||
3796 | |||
3797 | static void vector_fmul_scalar_c(float *dst, const float *src, float mul, | ||
3798 | int len) | ||
3799 | { | ||
3800 | int i; | ||
3801 | for (i = 0; i < len; i++) | ||
3802 | dst[i] = src[i] * mul; | ||
3803 | } | ||
3804 | |||
3805 | static void vector_fmul_sv_scalar_2_c(float *dst, const float *src, | ||
3806 | const float **sv, float mul, int len) | ||
3807 | { | ||
3808 | int i; | ||
3809 | for (i = 0; i < len; i += 2, sv++) { | ||
3810 | dst[i ] = src[i ] * sv[0][0] * mul; | ||
3811 | dst[i+1] = src[i+1] * sv[0][1] * mul; | ||
3812 | } | ||
3813 | } | ||
3814 | |||
3815 | static void vector_fmul_sv_scalar_4_c(float *dst, const float *src, | ||
3816 | const float **sv, float mul, int len) | ||
3817 | { | ||
3818 | int i; | ||
3819 | for (i = 0; i < len; i += 4, sv++) { | ||
3820 | dst[i ] = src[i ] * sv[0][0] * mul; | ||
3821 | dst[i+1] = src[i+1] * sv[0][1] * mul; | ||
3822 | dst[i+2] = src[i+2] * sv[0][2] * mul; | ||
3823 | dst[i+3] = src[i+3] * sv[0][3] * mul; | ||
3824 | } | ||
3825 | } | ||
3826 | |||
3827 | static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul, | ||
3828 | int len) | ||
3829 | { | ||
3830 | int i; | ||
3831 | for (i = 0; i < len; i += 2, sv++) { | ||
3832 | dst[i ] = sv[0][0] * mul; | ||
3833 | dst[i+1] = sv[0][1] * mul; | ||
3834 | } | ||
3835 | } | ||
3836 | |||
3837 | static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul, | ||
3838 | int len) | ||
3839 | { | ||
3840 | int i; | ||
3841 | for (i = 0; i < len; i += 4, sv++) { | ||
3842 | dst[i ] = sv[0][0] * mul; | ||
3843 | dst[i+1] = sv[0][1] * mul; | ||
3844 | dst[i+2] = sv[0][2] * mul; | ||
3845 | dst[i+3] = sv[0][3] * mul; | ||
3846 | } | ||
3847 | } | ||
3848 | |||
3849 | static void butterflies_float_c(float *restrict v1, float *restrict v2, | ||
3850 | int len) | ||
3851 | { | ||
3852 | int i; | ||
3853 | for (i = 0; i < len; i++) { | ||
3854 | float t = v1[i] - v2[i]; | ||
3855 | v1[i] += v2[i]; | ||
3856 | v2[i] = t; | ||
3857 | } | ||
3858 | } | ||
3859 | |||
3860 | static float scalarproduct_float_c(const float *v1, const float *v2, int len) | ||
3861 | { | ||
3862 | float p = 0.0; | ||
3863 | int i; | ||
3864 | |||
3865 | for (i = 0; i < len; i++) | ||
3866 | p += v1[i] * v2[i]; | ||
3867 | |||
3868 | return p; | ||
3869 | } | ||
3870 | |||
3871 | static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){ | ||
3872 | int i; | ||
3873 | for(i=0; i<len; i++) | ||
3874 | dst[i] = src[i] * mul; | ||
3875 | } | ||
3876 | |||
3877 | static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, | ||
3878 | uint32_t maxi, uint32_t maxisign) | ||
3879 | { | ||
3880 | |||
3881 | if(a > mini) return mini; | ||
3882 | else if((a^(1<<31)) > maxisign) return maxi; | ||
3883 | else return a; | ||
3884 | } | ||
3885 | |||
3886 | static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){ | ||
3887 | int i; | ||
3888 | uint32_t mini = *(uint32_t*)min; | ||
3889 | uint32_t maxi = *(uint32_t*)max; | ||
3890 | uint32_t maxisign = maxi ^ (1<<31); | ||
3891 | uint32_t *dsti = (uint32_t*)dst; | ||
3892 | const uint32_t *srci = (const uint32_t*)src; | ||
3893 | for(i=0; i<len; i+=8) { | ||
3894 | dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); | ||
3895 | dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); | ||
3896 | dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); | ||
3897 | dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); | ||
3898 | dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); | ||
3899 | dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); | ||
3900 | dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); | ||
3901 | dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); | ||
3902 | } | ||
3903 | } | ||
3904 | static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){ | ||
3905 | int i; | ||
3906 | if(min < 0 && max > 0) { | ||
3907 | vector_clipf_c_opposite_sign(dst, src, &min, &max, len); | ||
3908 | } else { | ||
3909 | for(i=0; i < len; i+=8) { | ||
3910 | dst[i ] = av_clipf(src[i ], min, max); | ||
3911 | dst[i + 1] = av_clipf(src[i + 1], min, max); | ||
3912 | dst[i + 2] = av_clipf(src[i + 2], min, max); | ||
3913 | dst[i + 3] = av_clipf(src[i + 3], min, max); | ||
3914 | dst[i + 4] = av_clipf(src[i + 4], min, max); | ||
3915 | dst[i + 5] = av_clipf(src[i + 5], min, max); | ||
3916 | dst[i + 6] = av_clipf(src[i + 6], min, max); | ||
3917 | dst[i + 7] = av_clipf(src[i + 7], min, max); | ||
3918 | } | ||
3919 | } | ||
3920 | } | ||
3921 | |||
3922 | static av_always_inline int float_to_int16_one(const float *src){ | ||
3923 | int_fast32_t tmp = *(const int32_t*)src; | ||
3924 | if(tmp & 0xf0000){ | ||
3925 | tmp = (0x43c0ffff - tmp)>>31; | ||
3926 | // is this faster on some gcc/cpu combinations? | ||
3927 | // if(tmp > 0x43c0ffff) tmp = 0xFFFF; | ||
3928 | // else tmp = 0; | ||
3929 | } | ||
3930 | return tmp - 0x8000; | ||
3931 | } | ||
3932 | |||
3933 | void ff_float_to_int16_c(int16_t *dst, const float *src, long len){ | ||
3934 | int i; | ||
3935 | for(i=0; i<len; i++) | ||
3936 | dst[i] = float_to_int16_one(src+i); | ||
3937 | } | ||
3938 | |||
3939 | void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){ | ||
3940 | int i,j,c; | ||
3941 | if(channels==2){ | ||
3942 | for(i=0; i<len; i++){ | ||
3943 | dst[2*i] = float_to_int16_one(src[0]+i); | ||
3944 | dst[2*i+1] = float_to_int16_one(src[1]+i); | ||
3945 | } | ||
3946 | }else{ | ||
3947 | for(c=0; c<channels; c++) | ||
3948 | for(i=0, j=c; i<len; i++, j+=channels) | ||
3949 | dst[j] = float_to_int16_one(src[c]+i); | ||
3950 | } | ||
3951 | } | ||
3952 | |||
3953 | static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift) | ||
3954 | { | ||
3955 | int res = 0; | ||
3956 | |||
3957 | while (order--) | ||
3958 | res += (*v1++ * *v2++) >> shift; | ||
3959 | |||
3960 | return res; | ||
3961 | } | ||
3962 | |||
3963 | static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul) | ||
3964 | { | ||
3965 | int res = 0; | ||
3966 | while (order--) { | ||
3967 | res += *v1 * *v2++; | ||
3968 | *v1++ += mul * *v3++; | ||
3969 | } | ||
3970 | return res; | ||
3971 | } | ||
3972 | |||
3973 | #define W0 2048 | ||
3974 | #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ | ||
3975 | #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ | ||
3976 | #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ | ||
3977 | #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ | ||
3978 | #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ | ||
3979 | #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ | ||
3980 | #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ | ||
3981 | |||
3982 | static void wmv2_idct_row(short * b) | ||
3983 | { | ||
3984 | int s1,s2; | ||
3985 | int a0,a1,a2,a3,a4,a5,a6,a7; | ||
3986 | /*step 1*/ | ||
3987 | a1 = W1*b[1]+W7*b[7]; | ||
3988 | a7 = W7*b[1]-W1*b[7]; | ||
3989 | a5 = W5*b[5]+W3*b[3]; | ||
3990 | a3 = W3*b[5]-W5*b[3]; | ||
3991 | a2 = W2*b[2]+W6*b[6]; | ||
3992 | a6 = W6*b[2]-W2*b[6]; | ||
3993 | a0 = W0*b[0]+W0*b[4]; | ||
3994 | a4 = W0*b[0]-W0*b[4]; | ||
3995 | /*step 2*/ | ||
3996 | s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7, | ||
3997 | s2 = (181*(a1-a5-a7+a3)+128)>>8; | ||
3998 | /*step 3*/ | ||
3999 | b[0] = (a0+a2+a1+a5 + (1<<7))>>8; | ||
4000 | b[1] = (a4+a6 +s1 + (1<<7))>>8; | ||
4001 | b[2] = (a4-a6 +s2 + (1<<7))>>8; | ||
4002 | b[3] = (a0-a2+a7+a3 + (1<<7))>>8; | ||
4003 | b[4] = (a0-a2-a7-a3 + (1<<7))>>8; | ||
4004 | b[5] = (a4-a6 -s2 + (1<<7))>>8; | ||
4005 | b[6] = (a4+a6 -s1 + (1<<7))>>8; | ||
4006 | b[7] = (a0+a2-a1-a5 + (1<<7))>>8; | ||
4007 | } | ||
4008 | static void wmv2_idct_col(short * b) | ||
4009 | { | ||
4010 | int s1,s2; | ||
4011 | int a0,a1,a2,a3,a4,a5,a6,a7; | ||
4012 | /*step 1, with extended precision*/ | ||
4013 | a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3; | ||
4014 | a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3; | ||
4015 | a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3; | ||
4016 | a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3; | ||
4017 | a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3; | ||
4018 | a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3; | ||
4019 | a0 = (W0*b[8*0]+W0*b[8*4] )>>3; | ||
4020 | a4 = (W0*b[8*0]-W0*b[8*4] )>>3; | ||
4021 | /*step 2*/ | ||
4022 | s1 = (181*(a1-a5+a7-a3)+128)>>8; | ||
4023 | s2 = (181*(a1-a5-a7+a3)+128)>>8; | ||
4024 | /*step 3*/ | ||
4025 | b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14; | ||
4026 | b[8*1] = (a4+a6 +s1 + (1<<13))>>14; | ||
4027 | b[8*2] = (a4-a6 +s2 + (1<<13))>>14; | ||
4028 | b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14; | ||
4029 | |||
4030 | b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14; | ||
4031 | b[8*5] = (a4-a6 -s2 + (1<<13))>>14; | ||
4032 | b[8*6] = (a4+a6 -s1 + (1<<13))>>14; | ||
4033 | b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14; | ||
4034 | } | ||
4035 | void ff_wmv2_idct_c(short * block){ | ||
4036 | int i; | ||
4037 | |||
4038 | for(i=0;i<64;i+=8){ | ||
4039 | wmv2_idct_row(block+i); | ||
4040 | } | ||
4041 | for(i=0;i<8;i++){ | ||
4042 | wmv2_idct_col(block+i); | ||
4043 | } | ||
4044 | } | ||
4045 | /* XXX: those functions should be suppressed ASAP when all IDCTs are | ||
4046 | converted */ | ||
4047 | static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block) | ||
4048 | { | ||
4049 | ff_wmv2_idct_c(block); | ||
4050 | put_pixels_clamped_c(block, dest, line_size); | ||
4051 | } | ||
4052 | static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block) | ||
4053 | { | ||
4054 | ff_wmv2_idct_c(block); | ||
4055 | add_pixels_clamped_c(block, dest, line_size); | ||
4056 | } | ||
4057 | static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | ||
4058 | { | ||
4059 | j_rev_dct (block); | ||
4060 | put_pixels_clamped_c(block, dest, line_size); | ||
4061 | } | ||
4062 | static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | ||
4063 | { | ||
4064 | j_rev_dct (block); | ||
4065 | add_pixels_clamped_c(block, dest, line_size); | ||
4066 | } | ||
4067 | |||
4068 | static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) | ||
4069 | { | ||
4070 | j_rev_dct4 (block); | ||
4071 | put_pixels_clamped4_c(block, dest, line_size); | ||
4072 | } | ||
4073 | static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) | ||
4074 | { | ||
4075 | j_rev_dct4 (block); | ||
4076 | add_pixels_clamped4_c(block, dest, line_size); | ||
4077 | } | ||
4078 | |||
4079 | static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) | ||
4080 | { | ||
4081 | j_rev_dct2 (block); | ||
4082 | put_pixels_clamped2_c(block, dest, line_size); | ||
4083 | } | ||
4084 | static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) | ||
4085 | { | ||
4086 | j_rev_dct2 (block); | ||
4087 | add_pixels_clamped2_c(block, dest, line_size); | ||
4088 | } | ||
4089 | |||
4090 | static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) | ||
4091 | { | ||
4092 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
4093 | |||
4094 | dest[0] = cm[(block[0] + 4)>>3]; | ||
4095 | } | ||
4096 | static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) | ||
4097 | { | ||
4098 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
4099 | |||
4100 | dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; | ||
4101 | } | ||
4102 | |||
4103 | static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } | ||
4104 | |||
4105 | /* init static data */ | ||
4106 | av_cold void dsputil_static_init(void) | ||
4107 | { | ||
4108 | int i; | ||
4109 | |||
4110 | for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i; | ||
4111 | for(i=0;i<MAX_NEG_CROP;i++) { | ||
4112 | ff_cropTbl[i] = 0; | ||
4113 | ff_cropTbl[i + MAX_NEG_CROP + 256] = 255; | ||
4114 | } | ||
4115 | |||
4116 | for(i=0;i<512;i++) { | ||
4117 | ff_squareTbl[i] = (i - 256) * (i - 256); | ||
4118 | } | ||
4119 | |||
4120 | for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; | ||
4121 | } | ||
4122 | |||
4123 | int ff_check_alignment(void){ | ||
4124 | static int did_fail=0; | ||
4125 | DECLARE_ALIGNED(16, int, aligned); | ||
4126 | |||
4127 | if((intptr_t)&aligned & 15){ | ||
4128 | if(!did_fail){ | ||
4129 | #if HAVE_MMX || HAVE_ALTIVEC | ||
4130 | av_log(NULL, AV_LOG_ERROR, | ||
4131 | "Compiler did not align stack variables. Libavcodec has been miscompiled\n" | ||
4132 | "and may be very slow or crash. This is not a bug in libavcodec,\n" | ||
4133 | "but in the compiler. You may try recompiling using gcc >= 4.2.\n" | ||
4134 | "Do not report crashes to FFmpeg developers.\n"); | ||
4135 | #endif | ||
4136 | did_fail=1; | ||
4137 | } | ||
4138 | return -1; | ||
4139 | } | ||
4140 | return 0; | ||
4141 | } | ||
4142 | |||
4143 | av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) | ||
4144 | { | ||
4145 | int i; | ||
4146 | |||
4147 | ff_check_alignment(); | ||
4148 | |||
4149 | #if CONFIG_ENCODERS | ||
4150 | if(avctx->dct_algo==FF_DCT_FASTINT) { | ||
4151 | c->fdct = fdct_ifast; | ||
4152 | c->fdct248 = fdct_ifast248; | ||
4153 | } | ||
4154 | else if(avctx->dct_algo==FF_DCT_FAAN) { | ||
4155 | c->fdct = ff_faandct; | ||
4156 | c->fdct248 = ff_faandct248; | ||
4157 | } | ||
4158 | else { | ||
4159 | c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default | ||
4160 | c->fdct248 = ff_fdct248_islow; | ||
4161 | } | ||
4162 | #endif //CONFIG_ENCODERS | ||
4163 | |||
4164 | if(avctx->lowres==1){ | ||
4165 | if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){ | ||
4166 | c->idct_put= ff_jref_idct4_put; | ||
4167 | c->idct_add= ff_jref_idct4_add; | ||
4168 | }else{ | ||
4169 | c->idct_put= ff_h264_lowres_idct_put_c; | ||
4170 | c->idct_add= ff_h264_lowres_idct_add_c; | ||
4171 | } | ||
4172 | c->idct = j_rev_dct4; | ||
4173 | c->idct_permutation_type= FF_NO_IDCT_PERM; | ||
4174 | }else if(avctx->lowres==2){ | ||
4175 | c->idct_put= ff_jref_idct2_put; | ||
4176 | c->idct_add= ff_jref_idct2_add; | ||
4177 | c->idct = j_rev_dct2; | ||
4178 | c->idct_permutation_type= FF_NO_IDCT_PERM; | ||
4179 | }else if(avctx->lowres==3){ | ||
4180 | c->idct_put= ff_jref_idct1_put; | ||
4181 | c->idct_add= ff_jref_idct1_add; | ||
4182 | c->idct = j_rev_dct1; | ||
4183 | c->idct_permutation_type= FF_NO_IDCT_PERM; | ||
4184 | }else{ | ||
4185 | if(avctx->idct_algo==FF_IDCT_INT){ | ||
4186 | c->idct_put= ff_jref_idct_put; | ||
4187 | c->idct_add= ff_jref_idct_add; | ||
4188 | c->idct = j_rev_dct; | ||
4189 | c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | ||
4190 | }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) && | ||
4191 | avctx->idct_algo==FF_IDCT_VP3){ | ||
4192 | c->idct_put= ff_vp3_idct_put_c; | ||
4193 | c->idct_add= ff_vp3_idct_add_c; | ||
4194 | c->idct = ff_vp3_idct_c; | ||
4195 | c->idct_permutation_type= FF_NO_IDCT_PERM; | ||
4196 | }else if(avctx->idct_algo==FF_IDCT_WMV2){ | ||
4197 | c->idct_put= ff_wmv2_idct_put_c; | ||
4198 | c->idct_add= ff_wmv2_idct_add_c; | ||
4199 | c->idct = ff_wmv2_idct_c; | ||
4200 | c->idct_permutation_type= FF_NO_IDCT_PERM; | ||
4201 | }else if(avctx->idct_algo==FF_IDCT_FAAN){ | ||
4202 | c->idct_put= ff_faanidct_put; | ||
4203 | c->idct_add= ff_faanidct_add; | ||
4204 | c->idct = ff_faanidct; | ||
4205 | c->idct_permutation_type= FF_NO_IDCT_PERM; | ||
4206 | }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { | ||
4207 | c->idct_put= ff_ea_idct_put_c; | ||
4208 | c->idct_permutation_type= FF_NO_IDCT_PERM; | ||
4209 | }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) { | ||
4210 | c->idct = ff_bink_idct_c; | ||
4211 | c->idct_add = ff_bink_idct_add_c; | ||
4212 | c->idct_put = ff_bink_idct_put_c; | ||
4213 | c->idct_permutation_type = FF_NO_IDCT_PERM; | ||
4214 | }else{ //accurate/default | ||
4215 | c->idct_put= ff_simple_idct_put; | ||
4216 | c->idct_add= ff_simple_idct_add; | ||
4217 | c->idct = ff_simple_idct; | ||
4218 | c->idct_permutation_type= FF_NO_IDCT_PERM; | ||
4219 | } | ||
4220 | } | ||
4221 | |||
4222 | c->get_pixels = get_pixels_c; | ||
4223 | c->diff_pixels = diff_pixels_c; | ||
4224 | c->put_pixels_clamped = put_pixels_clamped_c; | ||
4225 | c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; | ||
4226 | c->put_pixels_nonclamped = put_pixels_nonclamped_c; | ||
4227 | c->add_pixels_clamped = add_pixels_clamped_c; | ||
4228 | c->add_pixels8 = add_pixels8_c; | ||
4229 | c->add_pixels4 = add_pixels4_c; | ||
4230 | c->sum_abs_dctelem = sum_abs_dctelem_c; | ||
4231 | c->gmc1 = gmc1_c; | ||
4232 | c->gmc = ff_gmc_c; | ||
4233 | c->clear_block = clear_block_c; | ||
4234 | c->clear_blocks = clear_blocks_c; | ||
4235 | c->pix_sum = pix_sum_c; | ||
4236 | c->pix_norm1 = pix_norm1_c; | ||
4237 | |||
4238 | c->fill_block_tab[0] = fill_block16_c; | ||
4239 | c->fill_block_tab[1] = fill_block8_c; | ||
4240 | c->scale_block = scale_block_c; | ||
4241 | |||
4242 | /* TODO [0] 16 [1] 8 */ | ||
4243 | c->pix_abs[0][0] = pix_abs16_c; | ||
4244 | c->pix_abs[0][1] = pix_abs16_x2_c; | ||
4245 | c->pix_abs[0][2] = pix_abs16_y2_c; | ||
4246 | c->pix_abs[0][3] = pix_abs16_xy2_c; | ||
4247 | c->pix_abs[1][0] = pix_abs8_c; | ||
4248 | c->pix_abs[1][1] = pix_abs8_x2_c; | ||
4249 | c->pix_abs[1][2] = pix_abs8_y2_c; | ||
4250 | c->pix_abs[1][3] = pix_abs8_xy2_c; | ||
4251 | |||
4252 | #define dspfunc(PFX, IDX, NUM) \ | ||
4253 | c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ | ||
4254 | c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ | ||
4255 | c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ | ||
4256 | c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c | ||
4257 | |||
4258 | dspfunc(put, 0, 16); | ||
4259 | dspfunc(put_no_rnd, 0, 16); | ||
4260 | dspfunc(put, 1, 8); | ||
4261 | dspfunc(put_no_rnd, 1, 8); | ||
4262 | dspfunc(put, 2, 4); | ||
4263 | dspfunc(put, 3, 2); | ||
4264 | |||
4265 | dspfunc(avg, 0, 16); | ||
4266 | dspfunc(avg_no_rnd, 0, 16); | ||
4267 | dspfunc(avg, 1, 8); | ||
4268 | dspfunc(avg_no_rnd, 1, 8); | ||
4269 | dspfunc(avg, 2, 4); | ||
4270 | dspfunc(avg, 3, 2); | ||
4271 | #undef dspfunc | ||
4272 | |||
4273 | c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c; | ||
4274 | c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c; | ||
4275 | |||
4276 | c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; | ||
4277 | c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; | ||
4278 | c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; | ||
4279 | c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; | ||
4280 | c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; | ||
4281 | c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; | ||
4282 | c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; | ||
4283 | c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; | ||
4284 | c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; | ||
4285 | |||
4286 | c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; | ||
4287 | c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; | ||
4288 | c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; | ||
4289 | c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; | ||
4290 | c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; | ||
4291 | c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; | ||
4292 | c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; | ||
4293 | c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; | ||
4294 | c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | ||
4295 | |||
4296 | #define dspfunc(PFX, IDX, NUM) \ | ||
4297 | c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ | ||
4298 | c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ | ||
4299 | c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ | ||
4300 | c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ | ||
4301 | c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ | ||
4302 | c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ | ||
4303 | c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ | ||
4304 | c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ | ||
4305 | c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ | ||
4306 | c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ | ||
4307 | c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ | ||
4308 | c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ | ||
4309 | c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ | ||
4310 | c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ | ||
4311 | c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ | ||
4312 | c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c | ||
4313 | |||
4314 | dspfunc(put_qpel, 0, 16); | ||
4315 | dspfunc(put_no_rnd_qpel, 0, 16); | ||
4316 | |||
4317 | dspfunc(avg_qpel, 0, 16); | ||
4318 | /* dspfunc(avg_no_rnd_qpel, 0, 16); */ | ||
4319 | |||
4320 | dspfunc(put_qpel, 1, 8); | ||
4321 | dspfunc(put_no_rnd_qpel, 1, 8); | ||
4322 | |||
4323 | dspfunc(avg_qpel, 1, 8); | ||
4324 | /* dspfunc(avg_no_rnd_qpel, 1, 8); */ | ||
4325 | |||
4326 | dspfunc(put_h264_qpel, 0, 16); | ||
4327 | dspfunc(put_h264_qpel, 1, 8); | ||
4328 | dspfunc(put_h264_qpel, 2, 4); | ||
4329 | dspfunc(put_h264_qpel, 3, 2); | ||
4330 | dspfunc(avg_h264_qpel, 0, 16); | ||
4331 | dspfunc(avg_h264_qpel, 1, 8); | ||
4332 | dspfunc(avg_h264_qpel, 2, 4); | ||
4333 | |||
4334 | #undef dspfunc | ||
4335 | c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; | ||
4336 | c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; | ||
4337 | c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; | ||
4338 | c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; | ||
4339 | c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; | ||
4340 | c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; | ||
4341 | c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c; | ||
4342 | c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c; | ||
4343 | |||
4344 | c->draw_edges = draw_edges_c; | ||
4345 | |||
4346 | #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER | ||
4347 | ff_mlp_init(c, avctx); | ||
4348 | #endif | ||
4349 | #if CONFIG_VC1_DECODER | ||
4350 | ff_vc1dsp_init(c,avctx); | ||
4351 | #endif | ||
4352 | #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER | ||
4353 | ff_intrax8dsp_init(c,avctx); | ||
4354 | #endif | ||
4355 | #if CONFIG_RV30_DECODER | ||
4356 | ff_rv30dsp_init(c,avctx); | ||
4357 | #endif | ||
4358 | #if CONFIG_RV40_DECODER | ||
4359 | ff_rv40dsp_init(c,avctx); | ||
4360 | c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c; | ||
4361 | c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c; | ||
4362 | c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c; | ||
4363 | c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c; | ||
4364 | #endif | ||
4365 | |||
4366 | c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; | ||
4367 | c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; | ||
4368 | c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; | ||
4369 | c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; | ||
4370 | c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; | ||
4371 | c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; | ||
4372 | c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; | ||
4373 | c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; | ||
4374 | |||
4375 | #define SET_CMP_FUNC(name) \ | ||
4376 | c->name[0]= name ## 16_c;\ | ||
4377 | c->name[1]= name ## 8x8_c; | ||
4378 | |||
4379 | SET_CMP_FUNC(hadamard8_diff) | ||
4380 | c->hadamard8_diff[4]= hadamard8_intra16_c; | ||
4381 | c->hadamard8_diff[5]= hadamard8_intra8x8_c; | ||
4382 | SET_CMP_FUNC(dct_sad) | ||
4383 | SET_CMP_FUNC(dct_max) | ||
4384 | #if CONFIG_GPL | ||
4385 | SET_CMP_FUNC(dct264_sad) | ||
4386 | #endif | ||
4387 | c->sad[0]= pix_abs16_c; | ||
4388 | c->sad[1]= pix_abs8_c; | ||
4389 | c->sse[0]= sse16_c; | ||
4390 | c->sse[1]= sse8_c; | ||
4391 | c->sse[2]= sse4_c; | ||
4392 | SET_CMP_FUNC(quant_psnr) | ||
4393 | SET_CMP_FUNC(rd) | ||
4394 | SET_CMP_FUNC(bit) | ||
4395 | c->vsad[0]= vsad16_c; | ||
4396 | c->vsad[4]= vsad_intra16_c; | ||
4397 | c->vsad[5]= vsad_intra8_c; | ||
4398 | c->vsse[0]= vsse16_c; | ||
4399 | c->vsse[4]= vsse_intra16_c; | ||
4400 | c->vsse[5]= vsse_intra8_c; | ||
4401 | c->nsse[0]= nsse16_c; | ||
4402 | c->nsse[1]= nsse8_c; | ||
4403 | #if CONFIG_DWT | ||
4404 | ff_dsputil_init_dwt(c); | ||
4405 | #endif | ||
4406 | |||
4407 | c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; | ||
4408 | |||
4409 | c->add_bytes= add_bytes_c; | ||
4410 | c->add_bytes_l2= add_bytes_l2_c; | ||
4411 | c->diff_bytes= diff_bytes_c; | ||
4412 | c->add_hfyu_median_prediction= add_hfyu_median_prediction_c; | ||
4413 | c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; | ||
4414 | c->add_hfyu_left_prediction = add_hfyu_left_prediction_c; | ||
4415 | c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c; | ||
4416 | c->bswap_buf= bswap_buf; | ||
4417 | #if CONFIG_PNG_DECODER | ||
4418 | c->add_png_paeth_prediction= ff_add_png_paeth_prediction; | ||
4419 | #endif | ||
4420 | |||
4421 | if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { | ||
4422 | c->h263_h_loop_filter= h263_h_loop_filter_c; | ||
4423 | c->h263_v_loop_filter= h263_v_loop_filter_c; | ||
4424 | } | ||
4425 | |||
4426 | if (CONFIG_VP3_DECODER) { | ||
4427 | c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c; | ||
4428 | c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c; | ||
4429 | c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c; | ||
4430 | } | ||
4431 | if (CONFIG_VP6_DECODER) { | ||
4432 | c->vp6_filter_diag4= ff_vp6_filter_diag4_c; | ||
4433 | } | ||
4434 | |||
4435 | c->h261_loop_filter= h261_loop_filter_c; | ||
4436 | |||
4437 | c->try_8x8basis= try_8x8basis_c; | ||
4438 | c->add_8x8basis= add_8x8basis_c; | ||
4439 | |||
4440 | #if CONFIG_VORBIS_DECODER | ||
4441 | c->vorbis_inverse_coupling = vorbis_inverse_coupling; | ||
4442 | #endif | ||
4443 | #if CONFIG_AC3_DECODER | ||
4444 | c->ac3_downmix = ff_ac3_downmix_c; | ||
4445 | #endif | ||
4446 | #if CONFIG_LPC | ||
4447 | c->lpc_compute_autocorr = ff_lpc_compute_autocorr; | ||
4448 | #endif | ||
4449 | c->vector_fmul = vector_fmul_c; | ||
4450 | c->vector_fmul_reverse = vector_fmul_reverse_c; | ||
4451 | c->vector_fmul_add = vector_fmul_add_c; | ||
4452 | c->vector_fmul_window = ff_vector_fmul_window_c; | ||
4453 | c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; | ||
4454 | c->vector_clipf = vector_clipf_c; | ||
4455 | c->float_to_int16 = ff_float_to_int16_c; | ||
4456 | c->float_to_int16_interleave = ff_float_to_int16_interleave_c; | ||
4457 | c->scalarproduct_int16 = scalarproduct_int16_c; | ||
4458 | c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; | ||
4459 | c->scalarproduct_float = scalarproduct_float_c; | ||
4460 | c->butterflies_float = butterflies_float_c; | ||
4461 | c->vector_fmul_scalar = vector_fmul_scalar_c; | ||
4462 | |||
4463 | c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c; | ||
4464 | c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c; | ||
4465 | |||
4466 | c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c; | ||
4467 | c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c; | ||
4468 | |||
4469 | c->shrink[0]= ff_img_copy_plane; | ||
4470 | c->shrink[1]= ff_shrink22; | ||
4471 | c->shrink[2]= ff_shrink44; | ||
4472 | c->shrink[3]= ff_shrink88; | ||
4473 | |||
4474 | c->prefetch= just_return; | ||
4475 | |||
4476 | memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); | ||
4477 | memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); | ||
4478 | |||
4479 | if (HAVE_MMX) dsputil_init_mmx (c, avctx); | ||
4480 | if (ARCH_ARM) dsputil_init_arm (c, avctx); | ||
4481 | if (CONFIG_MLIB) dsputil_init_mlib (c, avctx); | ||
4482 | if (HAVE_VIS) dsputil_init_vis (c, avctx); | ||
4483 | if (ARCH_ALPHA) dsputil_init_alpha (c, avctx); | ||
4484 | if (ARCH_PPC) dsputil_init_ppc (c, avctx); | ||
4485 | if (HAVE_MMI) dsputil_init_mmi (c, avctx); | ||
4486 | if (ARCH_SH4) dsputil_init_sh4 (c, avctx); | ||
4487 | if (ARCH_BFIN) dsputil_init_bfin (c, avctx); | ||
4488 | |||
4489 | for(i=0; i<64; i++){ | ||
4490 | if(!c->put_2tap_qpel_pixels_tab[0][i]) | ||
4491 | c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i]; | ||
4492 | if(!c->avg_2tap_qpel_pixels_tab[0][i]) | ||
4493 | c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i]; | ||
4494 | } | ||
4495 | |||
4496 | c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0]; | ||
4497 | c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0]; | ||
4498 | c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0]; | ||
4499 | c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0]; | ||
4500 | |||
4501 | c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0]; | ||
4502 | c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0]; | ||
4503 | c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0]; | ||
4504 | c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0]; | ||
4505 | |||
4506 | switch(c->idct_permutation_type){ | ||
4507 | case FF_NO_IDCT_PERM: | ||
4508 | for(i=0; i<64; i++) | ||
4509 | c->idct_permutation[i]= i; | ||
4510 | break; | ||
4511 | case FF_LIBMPEG2_IDCT_PERM: | ||
4512 | for(i=0; i<64; i++) | ||
4513 | c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | ||
4514 | break; | ||
4515 | case FF_SIMPLE_IDCT_PERM: | ||
4516 | for(i=0; i<64; i++) | ||
4517 | c->idct_permutation[i]= simple_mmx_permutation[i]; | ||
4518 | break; | ||
4519 | case FF_TRANSPOSE_IDCT_PERM: | ||
4520 | for(i=0; i<64; i++) | ||
4521 | c->idct_permutation[i]= ((i&7)<<3) | (i>>3); | ||
4522 | break; | ||
4523 | case FF_PARTTRANS_IDCT_PERM: | ||
4524 | for(i=0; i<64; i++) | ||
4525 | c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); | ||
4526 | break; | ||
4527 | case FF_SSE2_IDCT_PERM: | ||
4528 | for(i=0; i<64; i++) | ||
4529 | c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; | ||
4530 | break; | ||
4531 | default: | ||
4532 | av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); | ||
4533 | } | ||
4534 | } | ||
4535 | |||
diff --git a/apps/codecs/libwmavoice/dsputil.h b/apps/codecs/libwmavoice/dsputil.h deleted file mode 100644 index 9ef0270ade..0000000000 --- a/apps/codecs/libwmavoice/dsputil.h +++ /dev/null | |||
@@ -1,800 +0,0 @@ | |||
1 | /* | ||
2 | * DSP utils | ||
3 | * Copyright (c) 2000, 2001, 2002 Fabrice Bellard | ||
4 | * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | /** | ||
24 | * @file | ||
25 | * DSP utils. | ||
26 | * note, many functions in here may use MMX which trashes the FPU state, it is | ||
27 | * absolutely necessary to call emms_c() between dsp & float/double code | ||
28 | */ | ||
29 | |||
30 | #ifndef AVCODEC_DSPUTIL_H | ||
31 | #define AVCODEC_DSPUTIL_H | ||
32 | |||
33 | #include "libavutil/intreadwrite.h" | ||
34 | #include "avcodec.h" | ||
35 | |||
36 | |||
37 | //#define DEBUG | ||
38 | /* dct code */ | ||
39 | typedef short DCTELEM; | ||
40 | |||
41 | void fdct_ifast (DCTELEM *data); | ||
42 | void fdct_ifast248 (DCTELEM *data); | ||
43 | void ff_jpeg_fdct_islow (DCTELEM *data); | ||
44 | void ff_fdct248_islow (DCTELEM *data); | ||
45 | |||
46 | void j_rev_dct (DCTELEM *data); | ||
47 | void j_rev_dct4 (DCTELEM *data); | ||
48 | void j_rev_dct2 (DCTELEM *data); | ||
49 | void j_rev_dct1 (DCTELEM *data); | ||
50 | void ff_wmv2_idct_c(DCTELEM *data); | ||
51 | |||
52 | void ff_fdct_mmx(DCTELEM *block); | ||
53 | void ff_fdct_mmx2(DCTELEM *block); | ||
54 | void ff_fdct_sse2(DCTELEM *block); | ||
55 | |||
56 | void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride); | ||
57 | void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); | ||
58 | void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); | ||
59 | void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); | ||
60 | void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); | ||
61 | void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); | ||
62 | void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | ||
63 | void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | ||
64 | void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | ||
65 | void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | ||
66 | |||
67 | void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, | ||
68 | const float *win, float add_bias, int len); | ||
69 | void ff_float_to_int16_c(int16_t *dst, const float *src, long len); | ||
70 | void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels); | ||
71 | |||
72 | /* encoding scans */ | ||
73 | extern const uint8_t ff_alternate_horizontal_scan[64]; | ||
74 | extern const uint8_t ff_alternate_vertical_scan[64]; | ||
75 | extern const uint8_t ff_zigzag_direct[64]; | ||
76 | extern const uint8_t ff_zigzag248_direct[64]; | ||
77 | |||
78 | /* pixel operations */ | ||
79 | #define MAX_NEG_CROP 1024 | ||
80 | |||
81 | /* temporary */ | ||
82 | extern uint32_t ff_squareTbl[512]; | ||
83 | extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP]; | ||
84 | |||
85 | /* VP3 DSP functions */ | ||
86 | void ff_vp3_idct_c(DCTELEM *block/* align 16*/); | ||
87 | void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | ||
88 | void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | ||
89 | void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/); | ||
90 | |||
91 | void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values); | ||
92 | void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values); | ||
93 | |||
94 | /* VP6 DSP functions */ | ||
95 | void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride, | ||
96 | const int16_t *h_weights, const int16_t *v_weights); | ||
97 | |||
98 | /* Bink functions */ | ||
99 | void ff_bink_idct_c (DCTELEM *block); | ||
100 | void ff_bink_idct_add_c(uint8_t *dest, int linesize, DCTELEM *block); | ||
101 | void ff_bink_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); | ||
102 | |||
103 | /* CAVS functions */ | ||
104 | void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride); | ||
105 | void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride); | ||
106 | void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride); | ||
107 | void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride); | ||
108 | |||
109 | /* VC1 functions */ | ||
110 | void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd); | ||
111 | void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd); | ||
112 | |||
113 | /* EA functions */ | ||
114 | void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); | ||
115 | |||
116 | /* 1/2^n downscaling functions from imgconvert.c */ | ||
117 | void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | ||
118 | void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | ||
119 | void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | ||
120 | void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | ||
121 | |||
122 | void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, | ||
123 | int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | ||
124 | |||
125 | /* minimum alignment rules ;) | ||
126 | If you notice errors in the align stuff, need more alignment for some ASM code | ||
127 | for some CPU or need to use a function with less aligned data then send a mail | ||
128 | to the ffmpeg-devel mailing list, ... | ||
129 | |||
130 | !warning These alignments might not match reality, (missing attribute((align)) | ||
131 | stuff somewhere possible). | ||
132 | I (Michael) did not check them, these are just the alignments which I think | ||
133 | could be reached easily ... | ||
134 | |||
135 | !future video codecs might need functions with less strict alignment | ||
136 | */ | ||
137 | |||
138 | /* | ||
139 | void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size); | ||
140 | void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); | ||
141 | void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); | ||
142 | void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); | ||
143 | void clear_blocks_c(DCTELEM *blocks); | ||
144 | */ | ||
145 | |||
146 | /* add and put pixel (decoding) */ | ||
147 | // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 | ||
148 | //h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4 | ||
149 | typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h); | ||
150 | typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); | ||
151 | typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); | ||
152 | typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); | ||
153 | |||
154 | typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h); | ||
155 | |||
156 | #define DEF_OLD_QPEL(name)\ | ||
157 | void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ | ||
158 | void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ | ||
159 | void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); | ||
160 | |||
161 | DEF_OLD_QPEL(qpel16_mc11_old_c) | ||
162 | DEF_OLD_QPEL(qpel16_mc31_old_c) | ||
163 | DEF_OLD_QPEL(qpel16_mc12_old_c) | ||
164 | DEF_OLD_QPEL(qpel16_mc32_old_c) | ||
165 | DEF_OLD_QPEL(qpel16_mc13_old_c) | ||
166 | DEF_OLD_QPEL(qpel16_mc33_old_c) | ||
167 | DEF_OLD_QPEL(qpel8_mc11_old_c) | ||
168 | DEF_OLD_QPEL(qpel8_mc31_old_c) | ||
169 | DEF_OLD_QPEL(qpel8_mc12_old_c) | ||
170 | DEF_OLD_QPEL(qpel8_mc32_old_c) | ||
171 | DEF_OLD_QPEL(qpel8_mc13_old_c) | ||
172 | DEF_OLD_QPEL(qpel8_mc33_old_c) | ||
173 | |||
174 | #define CALL_2X_PIXELS(a, b, n)\ | ||
175 | static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
176 | b(block , pixels , line_size, h);\ | ||
177 | b(block+n, pixels+n, line_size, h);\ | ||
178 | } | ||
179 | |||
180 | /* motion estimation */ | ||
181 | // h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2 | ||
182 | // although currently h<4 is not used as functions with width <8 are neither used nor implemented | ||
183 | typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; | ||
184 | |||
185 | /** | ||
186 | * Scantable. | ||
187 | */ | ||
188 | typedef struct ScanTable{ | ||
189 | const uint8_t *scantable; | ||
190 | uint8_t permutated[64]; | ||
191 | uint8_t raster_end[64]; | ||
192 | #if ARCH_PPC | ||
193 | /** Used by dct_quantize_altivec to find last-non-zero */ | ||
194 | DECLARE_ALIGNED(16, uint8_t, inverse)[64]; | ||
195 | #endif | ||
196 | } ScanTable; | ||
197 | |||
198 | void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable); | ||
199 | |||
200 | void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, | ||
201 | int block_w, int block_h, | ||
202 | int src_x, int src_y, int w, int h); | ||
203 | |||
204 | /** | ||
205 | * DSPContext. | ||
206 | */ | ||
207 | typedef struct DSPContext { | ||
208 | /* pixel ops : interface with DCT */ | ||
209 | void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); | ||
210 | void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); | ||
211 | void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||
212 | void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||
213 | void (*put_pixels_nonclamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||
214 | void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||
215 | void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size); | ||
216 | void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size); | ||
217 | int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/); | ||
218 | /** | ||
219 | * translational global motion compensation. | ||
220 | */ | ||
221 | void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); | ||
222 | /** | ||
223 | * global motion compensation. | ||
224 | */ | ||
225 | void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, | ||
226 | int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | ||
227 | void (*clear_block)(DCTELEM *block/*align 16*/); | ||
228 | void (*clear_blocks)(DCTELEM *blocks/*align 16*/); | ||
229 | int (*pix_sum)(uint8_t * pix, int line_size); | ||
230 | int (*pix_norm1)(uint8_t * pix, int line_size); | ||
231 | // 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4 | ||
232 | |||
233 | me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */ | ||
234 | me_cmp_func sse[6]; | ||
235 | me_cmp_func hadamard8_diff[6]; | ||
236 | me_cmp_func dct_sad[6]; | ||
237 | me_cmp_func quant_psnr[6]; | ||
238 | me_cmp_func bit[6]; | ||
239 | me_cmp_func rd[6]; | ||
240 | me_cmp_func vsad[6]; | ||
241 | me_cmp_func vsse[6]; | ||
242 | me_cmp_func nsse[6]; | ||
243 | me_cmp_func w53[6]; | ||
244 | me_cmp_func w97[6]; | ||
245 | me_cmp_func dct_max[6]; | ||
246 | me_cmp_func dct264_sad[6]; | ||
247 | |||
248 | me_cmp_func me_pre_cmp[6]; | ||
249 | me_cmp_func me_cmp[6]; | ||
250 | me_cmp_func me_sub_cmp[6]; | ||
251 | me_cmp_func mb_cmp[6]; | ||
252 | me_cmp_func ildct_cmp[6]; //only width 16 used | ||
253 | me_cmp_func frame_skip_cmp[6]; //only width 8 used | ||
254 | |||
255 | int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, | ||
256 | int size); | ||
257 | |||
258 | /** | ||
259 | * Halfpel motion compensation with rounding (a+b+1)>>1. | ||
260 | * this is an array[4][4] of motion compensation functions for 4 | ||
261 | * horizontal blocksizes (8,16) and the 4 halfpel positions<br> | ||
262 | * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] | ||
263 | * @param block destination where the result is stored | ||
264 | * @param pixels source | ||
265 | * @param line_size number of bytes in a horizontal line of block | ||
266 | * @param h height | ||
267 | */ | ||
268 | op_pixels_func put_pixels_tab[4][4]; | ||
269 | |||
270 | /** | ||
271 | * Halfpel motion compensation with rounding (a+b+1)>>1. | ||
272 | * This is an array[4][4] of motion compensation functions for 4 | ||
273 | * horizontal blocksizes (8,16) and the 4 halfpel positions<br> | ||
274 | * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] | ||
275 | * @param block destination into which the result is averaged (a+b+1)>>1 | ||
276 | * @param pixels source | ||
277 | * @param line_size number of bytes in a horizontal line of block | ||
278 | * @param h height | ||
279 | */ | ||
280 | op_pixels_func avg_pixels_tab[4][4]; | ||
281 | |||
282 | /** | ||
283 | * Halfpel motion compensation with no rounding (a+b)>>1. | ||
284 | * this is an array[2][4] of motion compensation functions for 2 | ||
285 | * horizontal blocksizes (8,16) and the 4 halfpel positions<br> | ||
286 | * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] | ||
287 | * @param block destination where the result is stored | ||
288 | * @param pixels source | ||
289 | * @param line_size number of bytes in a horizontal line of block | ||
290 | * @param h height | ||
291 | */ | ||
292 | op_pixels_func put_no_rnd_pixels_tab[4][4]; | ||
293 | |||
294 | /** | ||
295 | * Halfpel motion compensation with no rounding (a+b)>>1. | ||
296 | * this is an array[2][4] of motion compensation functions for 2 | ||
297 | * horizontal blocksizes (8,16) and the 4 halfpel positions<br> | ||
298 | * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] | ||
299 | * @param block destination into which the result is averaged (a+b)>>1 | ||
300 | * @param pixels source | ||
301 | * @param line_size number of bytes in a horizontal line of block | ||
302 | * @param h height | ||
303 | */ | ||
304 | op_pixels_func avg_no_rnd_pixels_tab[4][4]; | ||
305 | |||
306 | void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h); | ||
307 | |||
308 | /** | ||
309 | * Thirdpel motion compensation with rounding (a+b+1)>>1. | ||
310 | * this is an array[12] of motion compensation functions for the 9 thirdpe | ||
311 | * positions<br> | ||
312 | * *pixels_tab[ xthirdpel + 4*ythirdpel ] | ||
313 | * @param block destination where the result is stored | ||
314 | * @param pixels source | ||
315 | * @param line_size number of bytes in a horizontal line of block | ||
316 | * @param h height | ||
317 | */ | ||
318 | tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width? | ||
319 | tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width? | ||
320 | |||
321 | qpel_mc_func put_qpel_pixels_tab[2][16]; | ||
322 | qpel_mc_func avg_qpel_pixels_tab[2][16]; | ||
323 | qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | ||
324 | qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; | ||
325 | qpel_mc_func put_mspel_pixels_tab[8]; | ||
326 | |||
327 | /** | ||
328 | * h264 Chroma MC | ||
329 | */ | ||
330 | h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; | ||
331 | h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; | ||
332 | /* This is really one func used in VC-1 decoding */ | ||
333 | h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3]; | ||
334 | h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3]; | ||
335 | |||
336 | qpel_mc_func put_h264_qpel_pixels_tab[4][16]; | ||
337 | qpel_mc_func avg_h264_qpel_pixels_tab[4][16]; | ||
338 | |||
339 | qpel_mc_func put_2tap_qpel_pixels_tab[4][16]; | ||
340 | qpel_mc_func avg_2tap_qpel_pixels_tab[4][16]; | ||
341 | |||
342 | me_cmp_func pix_abs[2][4]; | ||
343 | |||
344 | /* huffyuv specific */ | ||
345 | void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); | ||
346 | void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w); | ||
347 | void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); | ||
348 | /** | ||
349 | * subtract huffyuv's variant of median prediction | ||
350 | * note, this might read from src1[-1], src2[-1] | ||
351 | */ | ||
352 | void (*sub_hfyu_median_prediction)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top); | ||
353 | void (*add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top); | ||
354 | int (*add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left); | ||
355 | void (*add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha); | ||
356 | /* this might write to dst[w] */ | ||
357 | void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); | ||
358 | void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); | ||
359 | |||
360 | void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); | ||
361 | void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); | ||
362 | |||
363 | void (*h261_loop_filter)(uint8_t *src, int stride); | ||
364 | |||
365 | void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale); | ||
366 | void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale); | ||
367 | |||
368 | void (*vp3_idct_dc_add)(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/); | ||
369 | void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values); | ||
370 | void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values); | ||
371 | |||
372 | void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride, | ||
373 | const int16_t *h_weights,const int16_t *v_weights); | ||
374 | |||
375 | /* assume len is a multiple of 4, and arrays are 16-byte aligned */ | ||
376 | void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); | ||
377 | void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); | ||
378 | /* no alignment needed */ | ||
379 | void (*lpc_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc); | ||
380 | /* assume len is a multiple of 8, and arrays are 16-byte aligned */ | ||
381 | void (*vector_fmul)(float *dst, const float *src, int len); | ||
382 | void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); | ||
383 | /* assume len is a multiple of 8, and src arrays are 16-byte aligned */ | ||
384 | void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len); | ||
385 | /* assume len is a multiple of 4, and arrays are 16-byte aligned */ | ||
386 | void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len); | ||
387 | /* assume len is a multiple of 8, and arrays are 16-byte aligned */ | ||
388 | void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); | ||
389 | void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); | ||
390 | /** | ||
391 | * Multiply a vector of floats by a scalar float. Source and | ||
392 | * destination vectors must overlap exactly or not at all. | ||
393 | * @param dst result vector, 16-byte aligned | ||
394 | * @param src input vector, 16-byte aligned | ||
395 | * @param mul scalar value | ||
396 | * @param len length of vector, multiple of 4 | ||
397 | */ | ||
398 | void (*vector_fmul_scalar)(float *dst, const float *src, float mul, | ||
399 | int len); | ||
400 | /** | ||
401 | * Multiply a vector of floats by concatenated short vectors of | ||
402 | * floats and by a scalar float. Source and destination vectors | ||
403 | * must overlap exactly or not at all. | ||
404 | * [0]: short vectors of length 2, 8-byte aligned | ||
405 | * [1]: short vectors of length 4, 16-byte aligned | ||
406 | * @param dst output vector, 16-byte aligned | ||
407 | * @param src input vector, 16-byte aligned | ||
408 | * @param sv array of pointers to short vectors | ||
409 | * @param mul scalar value | ||
410 | * @param len number of elements in src and dst, multiple of 4 | ||
411 | */ | ||
412 | void (*vector_fmul_sv_scalar[2])(float *dst, const float *src, | ||
413 | const float **sv, float mul, int len); | ||
414 | /** | ||
415 | * Multiply short vectors of floats by a scalar float, store | ||
416 | * concatenated result. | ||
417 | * [0]: short vectors of length 2, 8-byte aligned | ||
418 | * [1]: short vectors of length 4, 16-byte aligned | ||
419 | * @param dst output vector, 16-byte aligned | ||
420 | * @param sv array of pointers to short vectors | ||
421 | * @param mul scalar value | ||
422 | * @param len number of output elements, multiple of 4 | ||
423 | */ | ||
424 | void (*sv_fmul_scalar[2])(float *dst, const float **sv, | ||
425 | float mul, int len); | ||
426 | /** | ||
427 | * Calculate the scalar product of two vectors of floats. | ||
428 | * @param v1 first vector, 16-byte aligned | ||
429 | * @param v2 second vector, 16-byte aligned | ||
430 | * @param len length of vectors, multiple of 4 | ||
431 | */ | ||
432 | float (*scalarproduct_float)(const float *v1, const float *v2, int len); | ||
433 | /** | ||
434 | * Calculate the sum and difference of two vectors of floats. | ||
435 | * @param v1 first input vector, sum output, 16-byte aligned | ||
436 | * @param v2 second input vector, difference output, 16-byte aligned | ||
437 | * @param len length of vectors, multiple of 4 | ||
438 | */ | ||
439 | void (*butterflies_float)(float *restrict v1, float *restrict v2, int len); | ||
440 | |||
441 | /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767] | ||
442 | * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */ | ||
443 | void (*float_to_int16)(int16_t *dst, const float *src, long len); | ||
444 | void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels); | ||
445 | |||
446 | /* (I)DCT */ | ||
447 | void (*fdct)(DCTELEM *block/* align 16*/); | ||
448 | void (*fdct248)(DCTELEM *block/* align 16*/); | ||
449 | |||
450 | /* IDCT really*/ | ||
451 | void (*idct)(DCTELEM *block/* align 16*/); | ||
452 | |||
453 | /** | ||
454 | * block -> idct -> clip to unsigned 8 bit -> dest. | ||
455 | * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...) | ||
456 | * @param line_size size in bytes of a horizontal line of dest | ||
457 | */ | ||
458 | void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | ||
459 | |||
460 | /** | ||
461 | * block -> idct -> add dest -> clip to unsigned 8 bit -> dest. | ||
462 | * @param line_size size in bytes of a horizontal line of dest | ||
463 | */ | ||
464 | void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | ||
465 | |||
466 | /** | ||
467 | * idct input permutation. | ||
468 | * several optimized IDCTs need a permutated input (relative to the normal order of the reference | ||
469 | * IDCT) | ||
470 | * this permutation must be performed before the idct_put/add, note, normally this can be merged | ||
471 | * with the zigzag/alternate scan<br> | ||
472 | * an example to avoid confusion: | ||
473 | * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...) | ||
474 | * - (x -> referece dct -> reference idct -> x) | ||
475 | * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x) | ||
476 | * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...) | ||
477 | */ | ||
478 | uint8_t idct_permutation[64]; | ||
479 | int idct_permutation_type; | ||
480 | #define FF_NO_IDCT_PERM 1 | ||
481 | #define FF_LIBMPEG2_IDCT_PERM 2 | ||
482 | #define FF_SIMPLE_IDCT_PERM 3 | ||
483 | #define FF_TRANSPOSE_IDCT_PERM 4 | ||
484 | #define FF_PARTTRANS_IDCT_PERM 5 | ||
485 | #define FF_SSE2_IDCT_PERM 6 | ||
486 | |||
487 | int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale); | ||
488 | void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); | ||
489 | #define BASIS_SHIFT 16 | ||
490 | #define RECON_SHIFT 6 | ||
491 | |||
492 | void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w); | ||
493 | #define EDGE_WIDTH 16 | ||
494 | |||
495 | void (*prefetch)(void *mem, int stride, int h); | ||
496 | |||
497 | void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | ||
498 | |||
499 | /* mlp/truehd functions */ | ||
500 | void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff, | ||
501 | int firorder, int iirorder, | ||
502 | unsigned int filter_shift, int32_t mask, int blocksize, | ||
503 | int32_t *sample_buffer); | ||
504 | |||
505 | /* vc1 functions */ | ||
506 | void (*vc1_inv_trans_8x8)(DCTELEM *b); | ||
507 | void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block); | ||
508 | void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block); | ||
509 | void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block); | ||
510 | void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block); | ||
511 | void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block); | ||
512 | void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block); | ||
513 | void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block); | ||
514 | void (*vc1_v_overlap)(uint8_t* src, int stride); | ||
515 | void (*vc1_h_overlap)(uint8_t* src, int stride); | ||
516 | void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq); | ||
517 | void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq); | ||
518 | void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq); | ||
519 | void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq); | ||
520 | void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq); | ||
521 | void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq); | ||
522 | /* put 8x8 block with bicubic interpolation and quarterpel precision | ||
523 | * last argument is actually round value instead of height | ||
524 | */ | ||
525 | op_pixels_func put_vc1_mspel_pixels_tab[16]; | ||
526 | op_pixels_func avg_vc1_mspel_pixels_tab[16]; | ||
527 | |||
528 | /* intrax8 functions */ | ||
529 | void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize); | ||
530 | void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize, | ||
531 | int * range, int * sum, int edges); | ||
532 | |||
533 | /** | ||
534 | * Calculate scalar product of two vectors. | ||
535 | * @param len length of vectors, should be multiple of 16 | ||
536 | * @param shift number of bits to discard from product | ||
537 | */ | ||
538 | int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len, int shift); | ||
539 | /* ape functions */ | ||
540 | /** | ||
541 | * Calculate scalar product of v1 and v2, | ||
542 | * and v1[i] += v3[i] * mul | ||
543 | * @param len length of vectors, should be multiple of 16 | ||
544 | */ | ||
545 | int32_t (*scalarproduct_and_madd_int16)(int16_t *v1/*align 16*/, const int16_t *v2, const int16_t *v3, int len, int mul); | ||
546 | |||
547 | /* rv30 functions */ | ||
548 | qpel_mc_func put_rv30_tpel_pixels_tab[4][16]; | ||
549 | qpel_mc_func avg_rv30_tpel_pixels_tab[4][16]; | ||
550 | |||
551 | /* rv40 functions */ | ||
552 | qpel_mc_func put_rv40_qpel_pixels_tab[4][16]; | ||
553 | qpel_mc_func avg_rv40_qpel_pixels_tab[4][16]; | ||
554 | h264_chroma_mc_func put_rv40_chroma_pixels_tab[3]; | ||
555 | h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3]; | ||
556 | |||
557 | /* bink functions */ | ||
558 | op_fill_func fill_block_tab[2]; | ||
559 | void (*scale_block)(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize); | ||
560 | } DSPContext; | ||
561 | |||
562 | void dsputil_static_init(void); | ||
563 | void dsputil_init(DSPContext* p, AVCodecContext *avctx); | ||
564 | |||
565 | int ff_check_alignment(void); | ||
566 | |||
567 | /** | ||
568 | * permute block according to permuatation. | ||
569 | * @param last last non zero element in scantable order | ||
570 | */ | ||
571 | void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last); | ||
572 | |||
573 | void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); | ||
574 | |||
575 | #define BYTE_VEC32(c) ((c)*0x01010101UL) | ||
576 | |||
577 | static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) | ||
578 | { | ||
579 | return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); | ||
580 | } | ||
581 | |||
582 | static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b) | ||
583 | { | ||
584 | return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); | ||
585 | } | ||
586 | |||
587 | static inline int get_penalty_factor(int lambda, int lambda2, int type){ | ||
588 | switch(type&0xFF){ | ||
589 | default: | ||
590 | case FF_CMP_SAD: | ||
591 | return lambda>>FF_LAMBDA_SHIFT; | ||
592 | case FF_CMP_DCT: | ||
593 | return (3*lambda)>>(FF_LAMBDA_SHIFT+1); | ||
594 | case FF_CMP_W53: | ||
595 | return (4*lambda)>>(FF_LAMBDA_SHIFT); | ||
596 | case FF_CMP_W97: | ||
597 | return (2*lambda)>>(FF_LAMBDA_SHIFT); | ||
598 | case FF_CMP_SATD: | ||
599 | case FF_CMP_DCT264: | ||
600 | return (2*lambda)>>FF_LAMBDA_SHIFT; | ||
601 | case FF_CMP_RD: | ||
602 | case FF_CMP_PSNR: | ||
603 | case FF_CMP_SSE: | ||
604 | case FF_CMP_NSSE: | ||
605 | return lambda2>>FF_LAMBDA_SHIFT; | ||
606 | case FF_CMP_BIT: | ||
607 | return 1; | ||
608 | } | ||
609 | } | ||
610 | |||
611 | /** | ||
612 | * Empty mmx state. | ||
613 | * this must be called between any dsp function and float/double code. | ||
614 | * for example sin(); dsp->idct_put(); emms_c(); cos() | ||
615 | */ | ||
616 | #define emms_c() | ||
617 | |||
618 | /* should be defined by architectures supporting | ||
619 | one or more MultiMedia extension */ | ||
620 | int mm_support(void); | ||
621 | extern int mm_flags; | ||
622 | |||
623 | void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); | ||
624 | void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx); | ||
625 | void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx); | ||
626 | void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); | ||
627 | void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); | ||
628 | void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); | ||
629 | void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); | ||
630 | void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); | ||
631 | void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); | ||
632 | |||
633 | void ff_dsputil_init_dwt(DSPContext *c); | ||
634 | void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx); | ||
635 | void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx); | ||
636 | void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx); | ||
637 | void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); | ||
638 | void ff_mlp_init(DSPContext* c, AVCodecContext *avctx); | ||
639 | void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx); | ||
640 | |||
641 | #if HAVE_MMX | ||
642 | |||
643 | #undef emms_c | ||
644 | |||
645 | static inline void emms(void) | ||
646 | { | ||
647 | __asm__ volatile ("emms;":::"memory"); | ||
648 | } | ||
649 | |||
650 | |||
651 | #define emms_c() \ | ||
652 | {\ | ||
653 | if (mm_flags & FF_MM_MMX)\ | ||
654 | emms();\ | ||
655 | } | ||
656 | |||
657 | #elif ARCH_ARM | ||
658 | |||
659 | #if HAVE_NEON | ||
660 | # define STRIDE_ALIGN 16 | ||
661 | #endif | ||
662 | |||
663 | #elif ARCH_PPC | ||
664 | |||
665 | #define STRIDE_ALIGN 16 | ||
666 | |||
667 | #elif HAVE_MMI | ||
668 | |||
669 | #define STRIDE_ALIGN 16 | ||
670 | |||
671 | #else | ||
672 | |||
673 | #define mm_flags 0 | ||
674 | #define mm_support() 0 | ||
675 | |||
676 | #endif | ||
677 | |||
678 | #ifndef STRIDE_ALIGN | ||
679 | # define STRIDE_ALIGN 8 | ||
680 | #endif | ||
681 | |||
682 | #define LOCAL_ALIGNED(a, t, v, s, ...) \ | ||
683 | uint8_t la_##v[sizeof(t s __VA_ARGS__) + (a)]; \ | ||
684 | t (*v) __VA_ARGS__ = (void *)FFALIGN((uintptr_t)la_##v, a) | ||
685 | |||
686 | #if HAVE_LOCAL_ALIGNED_8 | ||
687 | # define LOCAL_ALIGNED_8(t, v, s, ...) DECLARE_ALIGNED(8, t, v) s __VA_ARGS__ | ||
688 | #else | ||
689 | # define LOCAL_ALIGNED_8(t, v, s, ...) LOCAL_ALIGNED(8, t, v, s, __VA_ARGS__) | ||
690 | #endif | ||
691 | |||
692 | #if HAVE_LOCAL_ALIGNED_16 | ||
693 | # define LOCAL_ALIGNED_16(t, v, s, ...) DECLARE_ALIGNED(16, t, v) s __VA_ARGS__ | ||
694 | #else | ||
695 | # define LOCAL_ALIGNED_16(t, v, s, ...) LOCAL_ALIGNED(16, t, v, s, __VA_ARGS__) | ||
696 | #endif | ||
697 | |||
698 | /* PSNR */ | ||
699 | void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3], | ||
700 | int orig_linesize[3], int coded_linesize, | ||
701 | AVCodecContext *avctx); | ||
702 | |||
703 | #define WRAPPER8_16(name8, name16)\ | ||
704 | static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ | ||
705 | return name8(s, dst , src , stride, h)\ | ||
706 | +name8(s, dst+8 , src+8 , stride, h);\ | ||
707 | } | ||
708 | |||
709 | #define WRAPPER8_16_SQ(name8, name16)\ | ||
710 | static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ | ||
711 | int score=0;\ | ||
712 | score +=name8(s, dst , src , stride, 8);\ | ||
713 | score +=name8(s, dst+8 , src+8 , stride, 8);\ | ||
714 | if(h==16){\ | ||
715 | dst += 8*stride;\ | ||
716 | src += 8*stride;\ | ||
717 | score +=name8(s, dst , src , stride, 8);\ | ||
718 | score +=name8(s, dst+8 , src+8 , stride, 8);\ | ||
719 | }\ | ||
720 | return score;\ | ||
721 | } | ||
722 | |||
723 | |||
724 | static inline void copy_block2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) | ||
725 | { | ||
726 | int i; | ||
727 | for(i=0; i<h; i++) | ||
728 | { | ||
729 | AV_WN16(dst , AV_RN16(src )); | ||
730 | dst+=dstStride; | ||
731 | src+=srcStride; | ||
732 | } | ||
733 | } | ||
734 | |||
735 | static inline void copy_block4(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) | ||
736 | { | ||
737 | int i; | ||
738 | for(i=0; i<h; i++) | ||
739 | { | ||
740 | AV_WN32(dst , AV_RN32(src )); | ||
741 | dst+=dstStride; | ||
742 | src+=srcStride; | ||
743 | } | ||
744 | } | ||
745 | |||
746 | static inline void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) | ||
747 | { | ||
748 | int i; | ||
749 | for(i=0; i<h; i++) | ||
750 | { | ||
751 | AV_WN32(dst , AV_RN32(src )); | ||
752 | AV_WN32(dst+4 , AV_RN32(src+4 )); | ||
753 | dst+=dstStride; | ||
754 | src+=srcStride; | ||
755 | } | ||
756 | } | ||
757 | |||
758 | static inline void copy_block9(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) | ||
759 | { | ||
760 | int i; | ||
761 | for(i=0; i<h; i++) | ||
762 | { | ||
763 | AV_WN32(dst , AV_RN32(src )); | ||
764 | AV_WN32(dst+4 , AV_RN32(src+4 )); | ||
765 | dst[8]= src[8]; | ||
766 | dst+=dstStride; | ||
767 | src+=srcStride; | ||
768 | } | ||
769 | } | ||
770 | |||
771 | static inline void copy_block16(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) | ||
772 | { | ||
773 | int i; | ||
774 | for(i=0; i<h; i++) | ||
775 | { | ||
776 | AV_WN32(dst , AV_RN32(src )); | ||
777 | AV_WN32(dst+4 , AV_RN32(src+4 )); | ||
778 | AV_WN32(dst+8 , AV_RN32(src+8 )); | ||
779 | AV_WN32(dst+12, AV_RN32(src+12)); | ||
780 | dst+=dstStride; | ||
781 | src+=srcStride; | ||
782 | } | ||
783 | } | ||
784 | |||
785 | static inline void copy_block17(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) | ||
786 | { | ||
787 | int i; | ||
788 | for(i=0; i<h; i++) | ||
789 | { | ||
790 | AV_WN32(dst , AV_RN32(src )); | ||
791 | AV_WN32(dst+4 , AV_RN32(src+4 )); | ||
792 | AV_WN32(dst+8 , AV_RN32(src+8 )); | ||
793 | AV_WN32(dst+12, AV_RN32(src+12)); | ||
794 | dst[16]= src[16]; | ||
795 | dst+=dstStride; | ||
796 | src+=srcStride; | ||
797 | } | ||
798 | } | ||
799 | |||
800 | #endif /* AVCODEC_DSPUTIL_H */ | ||