diff options
Diffstat (limited to 'apps/codecs')
-rw-r--r-- | apps/codecs/libatrac/Makefile.test | 6 | ||||
-rw-r--r-- | apps/codecs/libatrac/README.rockbox | 10 | ||||
-rw-r--r-- | apps/codecs/libatrac/atrac3.c | 208 | ||||
-rw-r--r-- | apps/codecs/libatrac/atrac3data_fixed.h | 145 | ||||
-rw-r--r-- | apps/codecs/libatrac/dsputil.c | 4114 | ||||
-rw-r--r-- | apps/codecs/libatrac/dsputil.h | 898 | ||||
-rw-r--r-- | apps/codecs/libatrac/fft.c | 374 | ||||
-rw-r--r-- | apps/codecs/libatrac/fixp_math.c | 66 | ||||
-rw-r--r-- | apps/codecs/libatrac/fixp_math.h | 14 | ||||
-rw-r--r-- | apps/codecs/libatrac/mdct.c | 245 |
10 files changed, 324 insertions, 5756 deletions
diff --git a/apps/codecs/libatrac/Makefile.test b/apps/codecs/libatrac/Makefile.test index 56b50b90fa..d1d35860e0 100644 --- a/apps/codecs/libatrac/Makefile.test +++ b/apps/codecs/libatrac/Makefile.test | |||
@@ -1,8 +1,8 @@ | |||
1 | CFLAGS = -Wall -O3 -DTEST -D"DEBUGF=printf" | 1 | CFLAGS = -Wall -O3 -DTEST -D"DEBUGF=printf" -D"ROCKBOX_LITTLE_ENDIAN=1" -D"ICONST_ATTR=" -D"ICODE_ATTR=" |
2 | OBJS = atrac3.o dsputil.o bitstream.o fft.o mdct.o libavutil/log.o libavutil/mem.o ../librm/rm.o | 2 | OBJS = atrac3.o bitstream.o libavutil/log.o libavutil/mem.o ../librm/rm.o fixp_math.o ../lib/mdct2.o ../lib/mdct_lookup.o |
3 | 3 | ||
4 | atractest: $(OBJS) | 4 | atractest: $(OBJS) |
5 | gcc -o atractest $(OBJS) -lm | 5 | gcc -o atractest $(OBJS) |
6 | 6 | ||
7 | .c.o : | 7 | .c.o : |
8 | $(CC) $(CFLAGS) -c -o $@ $< | 8 | $(CC) $(CFLAGS) -c -o $@ $< |
diff --git a/apps/codecs/libatrac/README.rockbox b/apps/codecs/libatrac/README.rockbox index 7f62e10bb4..46738e7d7b 100644 --- a/apps/codecs/libatrac/README.rockbox +++ b/apps/codecs/libatrac/README.rockbox | |||
@@ -8,13 +8,19 @@ ffmpeg is licensed under the Lesser GNU General Public License. | |||
8 | 8 | ||
9 | IMPORT DETAILS | 9 | IMPORT DETAILS |
10 | 10 | ||
11 | The decoder is based on ffmpeg-svn r18079. It still uses floating | 11 | The decoder is based on ffmpeg-svn r18079. |
12 | point math and not suitable to be used in rockbox. | 12 | The decoder had been modified to use fixed-point arithmetic. |
13 | 13 | ||
14 | TESTING | 14 | TESTING |
15 | 15 | ||
16 | The test program should compile in any Unix-like environment using the | 16 | The test program should compile in any Unix-like environment using the |
17 | command "make -f Makefile.test". | 17 | command "make -f Makefile.test". |
18 | 18 | ||
19 | For ARM targets add -DCPU_ARM to CFLAGS in Makefile.test to make use of | ||
20 | the asm ARM optimisations in rockbox's mdct library. | ||
21 | |||
22 | For Big-endian targets, change -D"ROCKBOX_LITTLE_ENDIAN=1" | ||
23 | to -D"ROCKBOX_BIG_ENDIAN=1" in Makefile.test. | ||
24 | |||
19 | Running "./atractest file.rm" will decode the audio data to a WAV file | 25 | Running "./atractest file.rm" will decode the audio data to a WAV file |
20 | called "output.wav" in the current directory. | 26 | called "output.wav" in the current directory. |
diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c index a800511397..838bbca48a 100644 --- a/apps/codecs/libatrac/atrac3.c +++ b/apps/codecs/libatrac/atrac3.c | |||
@@ -38,7 +38,6 @@ | |||
38 | 38 | ||
39 | #include "avcodec.h" | 39 | #include "avcodec.h" |
40 | #include "bitstream.h" | 40 | #include "bitstream.h" |
41 | #include "dsputil.h" | ||
42 | #include "bytestream.h" | 41 | #include "bytestream.h" |
43 | 42 | ||
44 | #include <stdint.h> | 43 | #include <stdint.h> |
@@ -50,6 +49,10 @@ | |||
50 | 49 | ||
51 | #include "../librm/rm.h" | 50 | #include "../librm/rm.h" |
52 | #include "atrac3data.h" | 51 | #include "atrac3data.h" |
52 | #include "atrac3data_fixed.h" | ||
53 | #include "fixp_math.h" | ||
54 | //#include "fixp_mdct.h" | ||
55 | #include "../lib/mdct2.h" | ||
53 | 56 | ||
54 | #define JOINT_STEREO 0x12 | 57 | #define JOINT_STEREO 0x12 |
55 | #define STEREO 0x2 | 58 | #define STEREO 0x2 |
@@ -70,23 +73,23 @@ typedef struct { | |||
70 | typedef struct { | 73 | typedef struct { |
71 | int pos; | 74 | int pos; |
72 | int numCoefs; | 75 | int numCoefs; |
73 | float coef[8]; | 76 | int32_t coef[8]; |
74 | } tonal_component; | 77 | } tonal_component; |
75 | 78 | ||
76 | typedef struct { | 79 | typedef struct { |
77 | int bandsCoded; | 80 | int bandsCoded; |
78 | int numComponents; | 81 | int numComponents; |
79 | tonal_component components[64]; | 82 | tonal_component components[64]; |
80 | float prevFrame[1024]; | 83 | int32_t prevFrame[1024]; |
81 | int gcBlkSwitch; | 84 | int gcBlkSwitch; |
82 | gain_block gainBlock[2]; | 85 | gain_block gainBlock[2]; |
83 | 86 | ||
84 | DECLARE_ALIGNED_16(float, spectrum[1024]); | 87 | int32_t spectrum[1024] __attribute__((aligned(16))); |
85 | DECLARE_ALIGNED_16(float, IMDCT_buf[1024]); | 88 | int32_t IMDCT_buf[1024] __attribute__((aligned(16))); |
86 | 89 | ||
87 | float delayBuf1[46]; ///<qmf delay buffers | 90 | int32_t delayBuf1[46]; ///<qmf delay buffers |
88 | float delayBuf2[46]; | 91 | int32_t delayBuf2[46]; |
89 | float delayBuf3[46]; | 92 | int32_t delayBuf3[46]; |
90 | } channel_unit; | 93 | } channel_unit; |
91 | 94 | ||
92 | typedef struct { | 95 | typedef struct { |
@@ -114,9 +117,9 @@ typedef struct { | |||
114 | //@} | 117 | //@} |
115 | //@{ | 118 | //@{ |
116 | /** data buffers */ | 119 | /** data buffers */ |
117 | float outSamples[2048]; | 120 | int32_t outSamples[2048]; |
118 | uint8_t* decoded_bytes_buffer; | 121 | uint8_t* decoded_bytes_buffer; |
119 | float tempBuf[1070]; | 122 | int32_t tempBuf[1070]; |
120 | //@} | 123 | //@} |
121 | //@{ | 124 | //@{ |
122 | /** extradata */ | 125 | /** extradata */ |
@@ -127,17 +130,8 @@ typedef struct { | |||
127 | //@} | 130 | //@} |
128 | } ATRAC3Context; | 131 | } ATRAC3Context; |
129 | 132 | ||
130 | static DECLARE_ALIGNED_16(float,mdct_window[512]); | 133 | static int32_t qmf_window[48]; |
131 | static float qmf_window[48]; | ||
132 | static VLC spectral_coeff_tab[7]; | 134 | static VLC spectral_coeff_tab[7]; |
133 | static float SFTable[64]; | ||
134 | static float gain_tab1[16]; | ||
135 | static float gain_tab2[31]; | ||
136 | static MDCTContext mdct_ctx; | ||
137 | static DSPContext dsp; | ||
138 | |||
139 | |||
140 | /* quadrature mirror synthesis filter */ | ||
141 | 135 | ||
142 | /** | 136 | /** |
143 | * Quadrature mirror synthesis filter. | 137 | * Quadrature mirror synthesis filter. |
@@ -149,14 +143,12 @@ static DSPContext dsp; | |||
149 | * @param delayBuf delayBuf buffer | 143 | * @param delayBuf delayBuf buffer |
150 | * @param temp temp buffer | 144 | * @param temp temp buffer |
151 | */ | 145 | */ |
152 | 146 | static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut, int32_t *delayBuf, int32_t *temp) | |
153 | |||
154 | static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float *delayBuf, float *temp) | ||
155 | { | 147 | { |
156 | int i, j; | 148 | int i, j; |
157 | float *p1, *p3; | 149 | int32_t *p1, *p3; |
158 | 150 | ||
159 | memcpy(temp, delayBuf, 46*sizeof(float)); | 151 | memcpy(temp, delayBuf, 46*sizeof(int32_t)); |
160 | 152 | ||
161 | p3 = temp + 46; | 153 | p3 = temp + 46; |
162 | 154 | ||
@@ -171,12 +163,12 @@ static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float | |||
171 | /* loop2 */ | 163 | /* loop2 */ |
172 | p1 = temp; | 164 | p1 = temp; |
173 | for (j = nIn; j != 0; j--) { | 165 | for (j = nIn; j != 0; j--) { |
174 | float s1 = 0.0; | 166 | int32_t s1 = 0; |
175 | float s2 = 0.0; | 167 | int32_t s2 = 0; |
176 | 168 | ||
177 | for (i = 0; i < 48; i += 2) { | 169 | for (i = 0; i < 48; i += 2) { |
178 | s1 += p1[i] * qmf_window[i]; | 170 | s1 += fixmul31(p1[i], qmf_window[i]); |
179 | s2 += p1[i+1] * qmf_window[i+1]; | 171 | s2 += fixmul31(p1[i+1], qmf_window[i+1]); |
180 | } | 172 | } |
181 | 173 | ||
182 | pOut[0] = s2; | 174 | pOut[0] = s2; |
@@ -187,7 +179,7 @@ static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float | |||
187 | } | 179 | } |
188 | 180 | ||
189 | /* Update the delay buffer. */ | 181 | /* Update the delay buffer. */ |
190 | memcpy(delayBuf, temp + nIn*2, 46*sizeof(float)); | 182 | memcpy(delayBuf, temp + (nIn << 1), 46*sizeof(int32_t)); |
191 | } | 183 | } |
192 | 184 | ||
193 | /** | 185 | /** |
@@ -199,10 +191,9 @@ static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float | |||
199 | * @param odd_band 1 if the band is an odd band | 191 | * @param odd_band 1 if the band is an odd band |
200 | */ | 192 | */ |
201 | 193 | ||
202 | static void IMLT(float *pInput, float *pOutput, int odd_band) | 194 | static void IMLT(int32_t *pInput, int32_t *pOutput, int odd_band) |
203 | { | 195 | { |
204 | int i; | 196 | int i; |
205 | |||
206 | if (odd_band) { | 197 | if (odd_band) { |
207 | /** | 198 | /** |
208 | * Reverse the odd bands before IMDCT, this is an effect of the QMF transform | 199 | * Reverse the odd bands before IMDCT, this is an effect of the QMF transform |
@@ -214,13 +205,15 @@ static void IMLT(float *pInput, float *pOutput, int odd_band) | |||
214 | */ | 205 | */ |
215 | 206 | ||
216 | for (i=0; i<128; i++) | 207 | for (i=0; i<128; i++) |
217 | FFSWAP(float, pInput[i], pInput[255-i]); | 208 | FFSWAP(int32_t, pInput[i], pInput[255-i]); |
218 | } | 209 | } |
210 | |||
211 | /* Apply the imdct. */ | ||
212 | mdct_backward(512, pInput, pOutput); | ||
219 | 213 | ||
220 | ff_imdct_calc(&mdct_ctx,pOutput,pInput); | 214 | /* Windowing. */ |
221 | 215 | for(i = 0; i<512; i++) | |
222 | /* Perform windowing on the output. */ | 216 | pOutput[i] = fixmul31(pOutput[i], window_lookup[i]); |
223 | dsp.vector_fmul(pOutput,mdct_window,512); | ||
224 | 217 | ||
225 | } | 218 | } |
226 | 219 | ||
@@ -259,30 +252,20 @@ static int decode_bytes(const uint8_t* inbuffer, uint8_t* out, int bytes){ | |||
259 | 252 | ||
260 | 253 | ||
261 | static av_cold void init_atrac3_transforms(ATRAC3Context *q) { | 254 | static av_cold void init_atrac3_transforms(ATRAC3Context *q) { |
262 | float enc_window[256]; | 255 | int32_t s; |
263 | float s; | ||
264 | int i; | 256 | int i; |
265 | 257 | ||
266 | /* Generate the mdct window, for details see | 258 | /* Generate the mdct window, for details see |
267 | * http://wiki.multimedia.cx/index.php?title=RealAudio_atrc#Windows */ | 259 | * http://wiki.multimedia.cx/index.php?title=RealAudio_atrc#Windows */ |
268 | for (i=0 ; i<256; i++) | ||
269 | enc_window[i] = (sin(((i + 0.5) / 256.0 - 0.5) * M_PI) + 1.0) * 0.5; | ||
270 | 260 | ||
271 | if (!mdct_window[0]) | 261 | /* mdct window had been generated and saved as a lookup table in atrac3data_fixed.h */ |
272 | for (i=0 ; i<256; i++) { | ||
273 | mdct_window[i] = enc_window[i]/(enc_window[i]*enc_window[i] + enc_window[255-i]*enc_window[255-i]); | ||
274 | mdct_window[511-i] = mdct_window[i]; | ||
275 | } | ||
276 | 262 | ||
277 | /* Generate the QMF window. */ | 263 | /* Generate the QMF window. */ |
278 | for (i=0 ; i<24; i++) { | 264 | for (i=0 ; i<24; i++) { |
279 | s = qmf_48tap_half[i] * 2.0; | 265 | s = qmf_48tap_half_fix[i] << 1; |
280 | qmf_window[i] = s; | 266 | qmf_window[i] = s; |
281 | qmf_window[47 - i] = s; | 267 | qmf_window[47 - i] = s; |
282 | } | 268 | } |
283 | |||
284 | /* Initialize the MDCT transform. */ | ||
285 | ff_mdct_init(&mdct_ctx, 9, 1); | ||
286 | } | 269 | } |
287 | 270 | ||
288 | /** | 271 | /** |
@@ -367,12 +350,12 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding | |||
367 | * @return outSubbands subband counter, fix for broken specification/files | 350 | * @return outSubbands subband counter, fix for broken specification/files |
368 | */ | 351 | */ |
369 | 352 | ||
370 | static int decodeSpectrum (GetBitContext *gb, float *pOut) | 353 | static int decodeSpectrum (GetBitContext *gb, int32_t *pOut) |
371 | { | 354 | { |
372 | int numSubbands, codingMode, cnt, first, last, subbWidth, *pIn; | 355 | int numSubbands, codingMode, cnt, first, last, subbWidth, *pIn; |
373 | int subband_vlc_index[32], SF_idxs[32]; | 356 | int subband_vlc_index[32], SF_idxs[32]; |
374 | int mantissas[128]; | 357 | int mantissas[128]; |
375 | float SF; | 358 | int32_t SF; |
376 | 359 | ||
377 | numSubbands = get_bits(gb, 5); // number of coded subbands | 360 | numSubbands = get_bits(gb, 5); // number of coded subbands |
378 | codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC | 361 | codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC |
@@ -400,20 +383,20 @@ static int decodeSpectrum (GetBitContext *gb, float *pOut) | |||
400 | readQuantSpectralCoeffs (gb, subband_vlc_index[cnt], codingMode, mantissas, subbWidth); | 383 | readQuantSpectralCoeffs (gb, subband_vlc_index[cnt], codingMode, mantissas, subbWidth); |
401 | 384 | ||
402 | /* Decode the scale factor for this subband. */ | 385 | /* Decode the scale factor for this subband. */ |
403 | SF = SFTable[SF_idxs[cnt]] * iMaxQuant[subband_vlc_index[cnt]]; | 386 | SF = fixmul31(SFTable_fixed[SF_idxs[cnt]], iMaxQuant_fix[subband_vlc_index[cnt]]); |
404 | 387 | ||
405 | /* Inverse quantize the coefficients. */ | 388 | /* Inverse quantize the coefficients. */ |
406 | for (pIn=mantissas ; first<last; first++, pIn++) | 389 | for (pIn=mantissas ; first<last; first++, pIn++) |
407 | pOut[first] = *pIn * SF; | 390 | pOut[first] = fixmul16(*pIn, SF); |
408 | } else { | 391 | } else { |
409 | /* This subband was not coded, so zero the entire subband. */ | 392 | /* This subband was not coded, so zero the entire subband. */ |
410 | memset(pOut+first, 0, subbWidth*sizeof(float)); | 393 | memset(pOut+first, 0, subbWidth*sizeof(int32_t)); |
411 | } | 394 | } |
412 | } | 395 | } |
413 | 396 | ||
414 | /* Clear the subbands that were not coded. */ | 397 | /* Clear the subbands that were not coded. */ |
415 | first = subbandTab[cnt]; | 398 | first = subbandTab[cnt]; |
416 | memset(pOut+first, 0, (1024 - first) * sizeof(float)); | 399 | memset(pOut+first, 0, (1024 - first) * sizeof(int32_t)); |
417 | return numSubbands; | 400 | return numSubbands; |
418 | } | 401 | } |
419 | 402 | ||
@@ -431,8 +414,8 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent | |||
431 | int components, coding_mode_selector, coding_mode, coded_values_per_component; | 414 | int components, coding_mode_selector, coding_mode, coded_values_per_component; |
432 | int sfIndx, coded_values, max_coded_values, quant_step_index, coded_components; | 415 | int sfIndx, coded_values, max_coded_values, quant_step_index, coded_components; |
433 | int band_flags[4], mantissa[8]; | 416 | int band_flags[4], mantissa[8]; |
434 | float *pCoef; | 417 | int32_t *pCoef; |
435 | float scalefactor; | 418 | int32_t scalefactor; |
436 | int component_count = 0; | 419 | int component_count = 0; |
437 | 420 | ||
438 | components = get_bits(gb,5); | 421 | components = get_bits(gb,5); |
@@ -473,7 +456,7 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent | |||
473 | coded_values = coded_values_per_component + 1; | 456 | coded_values = coded_values_per_component + 1; |
474 | coded_values = FFMIN(max_coded_values,coded_values); | 457 | coded_values = FFMIN(max_coded_values,coded_values); |
475 | 458 | ||
476 | scalefactor = SFTable[sfIndx] * iMaxQuant[quant_step_index]; | 459 | scalefactor = fixmul31(SFTable_fixed[sfIndx], iMaxQuant_fix[quant_step_index]); |
477 | 460 | ||
478 | readQuantSpectralCoeffs(gb, quant_step_index, coding_mode, mantissa, coded_values); | 461 | readQuantSpectralCoeffs(gb, quant_step_index, coding_mode, mantissa, coded_values); |
479 | 462 | ||
@@ -482,7 +465,7 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent | |||
482 | /* inverse quant */ | 465 | /* inverse quant */ |
483 | pCoef = pComponent[component_count].coef; | 466 | pCoef = pComponent[component_count].coef; |
484 | for (cnt = 0; cnt < coded_values; cnt++) | 467 | for (cnt = 0; cnt < coded_values; cnt++) |
485 | pCoef[cnt] = mantissa[cnt] * scalefactor; | 468 | pCoef[cnt] = fixmul16(mantissa[cnt], scalefactor); |
486 | 469 | ||
487 | component_count++; | 470 | component_count++; |
488 | } | 471 | } |
@@ -539,21 +522,21 @@ static int decodeGainControl (GetBitContext *gb, gain_block *pGb, int numBands) | |||
539 | * @param pGain2 next band gain info | 522 | * @param pGain2 next band gain info |
540 | */ | 523 | */ |
541 | 524 | ||
542 | static void gainCompensateAndOverlap (float *pIn, float *pPrev, float *pOut, gain_info *pGain1, gain_info *pGain2) | 525 | static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOut, gain_info *pGain1, gain_info *pGain2) |
543 | { | 526 | { |
544 | /* gain compensation function */ | 527 | /* gain compensation function */ |
545 | float gain1, gain2, gain_inc; | 528 | int32_t gain1, gain2, gain_inc; |
546 | int cnt, numdata, nsample, startLoc, endLoc; | 529 | int cnt, numdata, nsample, startLoc, endLoc; |
547 | 530 | ||
548 | 531 | ||
549 | if (pGain2->num_gain_data == 0) | 532 | if (pGain2->num_gain_data == 0) |
550 | gain1 = 1.0; | 533 | gain1 = ONE_16; |
551 | else | 534 | else |
552 | gain1 = gain_tab1[pGain2->levcode[0]]; | 535 | gain1 = gain_tab1[pGain2->levcode[0]]; |
553 | 536 | ||
554 | if (pGain1->num_gain_data == 0) { | 537 | if (pGain1->num_gain_data == 0) { |
555 | for (cnt = 0; cnt < 256; cnt++) | 538 | for (cnt = 0; cnt < 256; cnt++) |
556 | pOut[cnt] = pIn[cnt] * gain1 + pPrev[cnt]; | 539 | pOut[cnt] = fixmul16(pIn[cnt], gain1) + pPrev[cnt]; |
557 | } else { | 540 | } else { |
558 | numdata = pGain1->num_gain_data; | 541 | numdata = pGain1->num_gain_data; |
559 | pGain1->loccode[numdata] = 32; | 542 | pGain1->loccode[numdata] = 32; |
@@ -570,36 +553,38 @@ static void gainCompensateAndOverlap (float *pIn, float *pPrev, float *pOut, gai | |||
570 | 553 | ||
571 | /* interpolate */ | 554 | /* interpolate */ |
572 | for (; nsample < startLoc; nsample++) | 555 | for (; nsample < startLoc; nsample++) |
573 | pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2; | 556 | pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]), gain2); |
574 | 557 | ||
575 | /* interpolation is done over eight samples */ | 558 | /* interpolation is done over eight samples */ |
576 | for (; nsample < endLoc; nsample++) { | 559 | for (; nsample < endLoc; nsample++) { |
577 | pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2; | 560 | pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]),gain2); |
578 | gain2 *= gain_inc; | 561 | gain2 = fixmul16(gain2, gain_inc); |
579 | } | 562 | } |
580 | } | 563 | } |
581 | 564 | ||
582 | for (; nsample < 256; nsample++) | 565 | for (; nsample < 256; nsample++) |
583 | pOut[nsample] = (pIn[nsample] * gain1) + pPrev[nsample]; | 566 | pOut[nsample] = fixmul16(pIn[nsample], gain1) + pPrev[nsample]; |
584 | } | 567 | } |
585 | 568 | ||
586 | /* Delay for the overlapping part. */ | 569 | /* Delay for the overlapping part. */ |
587 | memcpy(pPrev, &pIn[256], 256*sizeof(float)); | 570 | memcpy(pPrev, &pIn[256], 256*sizeof(int32_t)); |
588 | } | 571 | } |
589 | 572 | ||
590 | /** | 573 | /** |
591 | * Combine the tonal band spectrum and regular band spectrum | 574 | * Combine the tonal band spectrum and regular band spectrum |
592 | * Return position of the last tonal coefficient | 575 | * Return position of the last tonal coefficient |
576 | |||
593 | * | 577 | * |
594 | * @param pSpectrum output spectrum buffer | 578 | * @param pSpectrum output spectrum buffer |
595 | * @param numComponents amount of tonal components | 579 | * @param numComponents amount of tonal components |
596 | * @param pComponent tonal components for this band | 580 | * @param pComponent tonal components for this band |
597 | */ | 581 | */ |
598 | 582 | ||
599 | static int addTonalComponents (float *pSpectrum, int numComponents, tonal_component *pComponent) | 583 | static int addTonalComponents (int32_t *pSpectrum, int numComponents, tonal_component *pComponent) |
600 | { | 584 | { |
601 | int cnt, i, lastPos = -1; | 585 | int cnt, i, lastPos = -1; |
602 | float *pIn, *pOut; | 586 | int32_t *pOut; |
587 | int32_t *pIn; | ||
603 | 588 | ||
604 | for (cnt = 0; cnt < numComponents; cnt++){ | 589 | for (cnt = 0; cnt < numComponents; cnt++){ |
605 | lastPos = FFMAX(pComponent[cnt].pos + pComponent[cnt].numCoefs, lastPos); | 590 | lastPos = FFMAX(pComponent[cnt].pos + pComponent[cnt].numCoefs, lastPos); |
@@ -614,13 +599,13 @@ static int addTonalComponents (float *pSpectrum, int numComponents, tonal_compon | |||
614 | } | 599 | } |
615 | 600 | ||
616 | 601 | ||
617 | #define INTERPOLATE(old,new,nsample) ((old) + (nsample)*0.125*((new)-(old))) | 602 | #define INTERPOLATE(old,new,nsample) ((old*ONE_16) + fixmul16(((nsample*ONE_16)>>3), (((new) - (old))*ONE_16))) |
618 | 603 | ||
619 | static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrCode) | 604 | static void reverseMatrixing(int32_t *su1, int32_t *su2, int *pPrevCode, int *pCurrCode) |
620 | { | 605 | { |
621 | int i, band, nsample, s1, s2; | 606 | int i, band, nsample, s1, s2; |
622 | float c1, c2; | 607 | int32_t c1, c2; |
623 | float mc1_l, mc1_r, mc2_l, mc2_r; | 608 | int32_t mc1_l, mc1_r, mc2_l, mc2_r; |
624 | 609 | ||
625 | for (i=0,band = 0; band < 4*256; band+=256,i++) { | 610 | for (i=0,band = 0; band < 4*256; band+=256,i++) { |
626 | s1 = pPrevCode[i]; | 611 | s1 = pPrevCode[i]; |
@@ -629,18 +614,18 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC | |||
629 | 614 | ||
630 | if (s1 != s2) { | 615 | if (s1 != s2) { |
631 | /* Selector value changed, interpolation needed. */ | 616 | /* Selector value changed, interpolation needed. */ |
632 | mc1_l = matrixCoeffs[s1*2]; | 617 | mc1_l = matrixCoeffs_fix[s1<<1]; |
633 | mc1_r = matrixCoeffs[s1*2+1]; | 618 | mc1_r = matrixCoeffs_fix[(s1<<1)+1]; |
634 | mc2_l = matrixCoeffs[s2*2]; | 619 | mc2_l = matrixCoeffs_fix[s2<<1]; |
635 | mc2_r = matrixCoeffs[s2*2+1]; | 620 | mc2_r = matrixCoeffs_fix[(s2<<1)+1]; |
636 | 621 | ||
637 | /* Interpolation is done over the first eight samples. */ | 622 | /* Interpolation is done over the first eight samples. */ |
638 | for(; nsample < 8; nsample++) { | 623 | for(; nsample < 8; nsample++) { |
639 | c1 = su1[band+nsample]; | 624 | c1 = su1[band+nsample]; |
640 | c2 = su2[band+nsample]; | 625 | c2 = su2[band+nsample]; |
641 | c2 = c1 * INTERPOLATE(mc1_l,mc2_l,nsample) + c2 * INTERPOLATE(mc1_r,mc2_r,nsample); | 626 | c2 = fixmul16(c1, INTERPOLATE(mc1_l, mc2_l, nsample)) + fixmul16(c2, INTERPOLATE(mc1_r, mc2_r, nsample)); |
642 | su1[band+nsample] = c2; | 627 | su1[band+nsample] = c2; |
643 | su2[band+nsample] = c1 * 2.0 - c2; | 628 | su2[band+nsample] = (c1 << 1) - c2; |
644 | } | 629 | } |
645 | } | 630 | } |
646 | 631 | ||
@@ -650,8 +635,8 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC | |||
650 | for (; nsample < 256; nsample++) { | 635 | for (; nsample < 256; nsample++) { |
651 | c1 = su1[band+nsample]; | 636 | c1 = su1[band+nsample]; |
652 | c2 = su2[band+nsample]; | 637 | c2 = su2[band+nsample]; |
653 | su1[band+nsample] = c2 * 2.0; | 638 | su1[band+nsample] = c2 << 1; |
654 | su2[band+nsample] = (c1 - c2) * 2.0; | 639 | su2[band+nsample] = (c1 - c2) << 1; |
655 | } | 640 | } |
656 | break; | 641 | break; |
657 | 642 | ||
@@ -659,8 +644,8 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC | |||
659 | for (; nsample < 256; nsample++) { | 644 | for (; nsample < 256; nsample++) { |
660 | c1 = su1[band+nsample]; | 645 | c1 = su1[band+nsample]; |
661 | c2 = su2[band+nsample]; | 646 | c2 = su2[band+nsample]; |
662 | su1[band+nsample] = (c1 + c2) * 2.0; | 647 | su1[band+nsample] = (c1 + c2) << 1; |
663 | su2[band+nsample] = c2 * -2.0; | 648 | su2[band+nsample] = -1*(c2 << 1); |
664 | } | 649 | } |
665 | break; | 650 | break; |
666 | case 2: | 651 | case 2: |
@@ -678,24 +663,23 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC | |||
678 | } | 663 | } |
679 | } | 664 | } |
680 | 665 | ||
681 | static void getChannelWeights (int indx, int flag, float ch[2]){ | 666 | static void getChannelWeights (int indx, int flag, int32_t ch[2]){ |
682 | |||
683 | if (indx == 7) { | 667 | if (indx == 7) { |
684 | ch[0] = 1.0; | 668 | ch[0] = ONE_16; |
685 | ch[1] = 1.0; | 669 | ch[1] = ONE_16; |
686 | } else { | 670 | } else { |
687 | ch[0] = (float)(indx & 7) / 7.0; | 671 | ch[0] = fixdiv16(((indx & 7)*ONE_16), 7*ONE_16); |
688 | ch[1] = sqrt(2 - ch[0]*ch[0]); | 672 | ch[1] = fastSqrt((ONE_16 << 1) - fixmul16(ch[0], ch[0])); |
689 | if(flag) | 673 | if(flag) |
690 | FFSWAP(float, ch[0], ch[1]); | 674 | FFSWAP(int32_t, ch[0], ch[1]); |
691 | } | 675 | } |
692 | } | 676 | } |
693 | 677 | ||
694 | static void channelWeighting (float *su1, float *su2, int *p3) | 678 | static void channelWeighting (int32_t *su1, int32_t *su2, int *p3) |
695 | { | 679 | { |
696 | int band, nsample; | 680 | int band, nsample; |
697 | /* w[x][y] y=0 is left y=1 is right */ | 681 | /* w[x][y] y=0 is left y=1 is right */ |
698 | float w[2][2]; | 682 | int32_t w[2][2]; |
699 | 683 | ||
700 | if (p3[1] != 7 || p3[3] != 7){ | 684 | if (p3[1] != 7 || p3[3] != 7){ |
701 | getChannelWeights(p3[1], p3[0], w[0]); | 685 | getChannelWeights(p3[1], p3[0], w[0]); |
@@ -704,13 +688,13 @@ static void channelWeighting (float *su1, float *su2, int *p3) | |||
704 | for(band = 1; band < 4; band++) { | 688 | for(band = 1; band < 4; band++) { |
705 | /* scale the channels by the weights */ | 689 | /* scale the channels by the weights */ |
706 | for(nsample = 0; nsample < 8; nsample++) { | 690 | for(nsample = 0; nsample < 8; nsample++) { |
707 | su1[band*256+nsample] *= INTERPOLATE(w[0][0], w[0][1], nsample); | 691 | su1[band*256+nsample] = fixmul16(su1[band*256+nsample], INTERPOLATE(w[0][0], w[0][1], nsample)); |
708 | su2[band*256+nsample] *= INTERPOLATE(w[1][0], w[1][1], nsample); | 692 | su2[band*256+nsample] = fixmul16(su2[band*256+nsample], INTERPOLATE(w[1][0], w[1][1], nsample)); |
709 | } | 693 | } |
710 | 694 | ||
711 | for(; nsample < 256; nsample++) { | 695 | for(; nsample < 256; nsample++) { |
712 | su1[band*256+nsample] *= w[1][0]; | 696 | su1[band*256+nsample] = fixmul16(su1[band*256+nsample], w[1][0]); |
713 | su2[band*256+nsample] *= w[1][1]; | 697 | su2[band*256+nsample] = fixmul16(su2[band*256+nsample], w[1][1]); |
714 | } | 698 | } |
715 | } | 699 | } |
716 | } | 700 | } |
@@ -728,10 +712,9 @@ static void channelWeighting (float *su1, float *su2, int *p3) | |||
728 | */ | 712 | */ |
729 | 713 | ||
730 | 714 | ||
731 | static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_unit *pSnd, float *pOut, int channelNum, int codingMode) | 715 | static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_unit *pSnd, int32_t *pOut, int channelNum, int codingMode) |
732 | { | 716 | { |
733 | int band, result=0, numSubbands, lastTonal, numBands; | 717 | int band, result=0, numSubbands, lastTonal, numBands; |
734 | |||
735 | if (codingMode == JOINT_STEREO && channelNum == 1) { | 718 | if (codingMode == JOINT_STEREO && channelNum == 1) { |
736 | if (get_bits(gb,2) != 3) { | 719 | if (get_bits(gb,2) != 3) { |
737 | av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n"); | 720 | av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n"); |
@@ -771,7 +754,7 @@ static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_ | |||
771 | if (band <= numBands) { | 754 | if (band <= numBands) { |
772 | IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf, band&1); | 755 | IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf, band&1); |
773 | } else | 756 | } else |
774 | memset(pSnd->IMDCT_buf, 0, 512 * sizeof(float)); | 757 | memset(pSnd->IMDCT_buf, 0, 512 * sizeof(int32_t)); |
775 | 758 | ||
776 | /* gain compensation and overlapping */ | 759 | /* gain compensation and overlapping */ |
777 | gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]), | 760 | gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]), |
@@ -795,7 +778,7 @@ static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_ | |||
795 | static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf) | 778 | static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf) |
796 | { | 779 | { |
797 | int result, i; | 780 | int result, i; |
798 | float *p1, *p2, *p3, *p4; | 781 | int32_t *p1, *p2, *p3, *p4; |
799 | uint8_t *ptr1; | 782 | uint8_t *ptr1; |
800 | 783 | ||
801 | if (q->codingMode == JOINT_STEREO) { | 784 | if (q->codingMode == JOINT_STEREO) { |
@@ -893,7 +876,6 @@ static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf) | |||
893 | static int atrac3_decode_frame(RMContext *rmctx, ATRAC3Context *q, | 876 | static int atrac3_decode_frame(RMContext *rmctx, ATRAC3Context *q, |
894 | void *data, int *data_size, | 877 | void *data, int *data_size, |
895 | const uint8_t *buf, int buf_size) { | 878 | const uint8_t *buf, int buf_size) { |
896 | //ATRAC3Context *q = rmctx->priv_data; | ||
897 | int result = 0, i; | 879 | int result = 0, i; |
898 | const uint8_t* databuf; | 880 | const uint8_t* databuf; |
899 | int16_t* samples = data; | 881 | int16_t* samples = data; |
@@ -919,13 +901,13 @@ static int atrac3_decode_frame(RMContext *rmctx, ATRAC3Context *q, | |||
919 | if (q->channels == 1) { | 901 | if (q->channels == 1) { |
920 | /* mono */ | 902 | /* mono */ |
921 | for (i = 0; i<1024; i++) | 903 | for (i = 0; i<1024; i++) |
922 | samples[i] = av_clip_int16(round(q->outSamples[i])); | 904 | samples[i] = av_clip_int16(q->outSamples[i]); |
923 | *data_size = 1024 * sizeof(int16_t); | 905 | *data_size = 1024 * sizeof(int16_t); |
924 | } else { | 906 | } else { |
925 | /* stereo */ | 907 | /* stereo */ |
926 | for (i = 0; i < 1024; i++) { | 908 | for (i = 0; i < 1024; i++) { |
927 | samples[i*2] = av_clip_int16(round(q->outSamples[i])); | 909 | samples[i*2] = av_clip_int16(q->outSamples[i]); |
928 | samples[i*2+1] = av_clip_int16(round(q->outSamples[1024+i])); | 910 | samples[i*2+1] = av_clip_int16(q->outSamples[1024+i]); |
929 | } | 911 | } |
930 | *data_size = 2048 * sizeof(int16_t); | 912 | *data_size = 2048 * sizeof(int16_t); |
931 | } | 913 | } |
@@ -944,7 +926,6 @@ static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx) | |||
944 | { | 926 | { |
945 | int i; | 927 | int i; |
946 | const uint8_t *edata_ptr = rmctx->codec_extradata; | 928 | const uint8_t *edata_ptr = rmctx->codec_extradata; |
947 | //ATRAC3Context *q = rmctx->priv_data; | ||
948 | static VLC_TYPE atrac3_vlc_table[4096][2]; | 929 | static VLC_TYPE atrac3_vlc_table[4096][2]; |
949 | static int vlcs_initialized = 0; | 930 | static int vlcs_initialized = 0; |
950 | 931 | ||
@@ -1051,17 +1032,6 @@ static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx) | |||
1051 | 1032 | ||
1052 | init_atrac3_transforms(q); | 1033 | init_atrac3_transforms(q); |
1053 | 1034 | ||
1054 | /* Generate the scale factors. */ | ||
1055 | for (i=0 ; i<64 ; i++) | ||
1056 | SFTable[i] = pow(2.0, (i - 15) / 3.0); | ||
1057 | |||
1058 | /* Generate gain tables. */ | ||
1059 | for (i=0 ; i<16 ; i++) | ||
1060 | gain_tab1[i] = powf (2.0, (4 - i)); | ||
1061 | |||
1062 | for (i=-15 ; i<16 ; i++) | ||
1063 | gain_tab2[i+15] = powf (2.0, i * -0.125); | ||
1064 | |||
1065 | /* init the joint-stereo decoding data */ | 1035 | /* init the joint-stereo decoding data */ |
1066 | q->weighting_delay[0] = 0; | 1036 | q->weighting_delay[0] = 0; |
1067 | q->weighting_delay[1] = 7; | 1037 | q->weighting_delay[1] = 7; |
@@ -1076,8 +1046,6 @@ static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx) | |||
1076 | q->matrix_coeff_index_next[i] = 3; | 1046 | q->matrix_coeff_index_next[i] = 3; |
1077 | } | 1047 | } |
1078 | 1048 | ||
1079 | dsputil_init(&dsp); | ||
1080 | |||
1081 | q->pUnits = av_mallocz(sizeof(channel_unit)*q->channels); | 1049 | q->pUnits = av_mallocz(sizeof(channel_unit)*q->channels); |
1082 | if (!q->pUnits) { | 1050 | if (!q->pUnits) { |
1083 | av_free(q->decoded_bytes_buffer); | 1051 | av_free(q->decoded_bytes_buffer); |
diff --git a/apps/codecs/libatrac/atrac3data_fixed.h b/apps/codecs/libatrac/atrac3data_fixed.h new file mode 100644 index 0000000000..8dbc952c2f --- /dev/null +++ b/apps/codecs/libatrac/atrac3data_fixed.h | |||
@@ -0,0 +1,145 @@ | |||
1 | |||
2 | /* tables for the scalefactor decoding */ | ||
3 | /* scaled by 2^31*/ | ||
4 | static const int32_t iMaxQuant_fix[8] = { | ||
5 | 0x0, 0x55555580, 0x33333340, 0x24924940, 0x1c71c720, 0x11111120, 0x8421080, | ||
6 | 0x4104108 | ||
7 | }; | ||
8 | |||
9 | /* scaled by 2^16 */ | ||
10 | static const int32_t SFTable_fixed[64] = { | ||
11 | 0x00000800, 0x00000a14, 0x00000cb3, 0x00001000, 0x00001429, 0x00001966, | ||
12 | 0x00002000, 0x00002851, 0x000032cc, 0x00004000, 0x000050a3, 0x00006598, | ||
13 | 0x00008000, 0x0000a145, 0x0000cb30, 0x00010000, 0x0001428a, 0x00019660, | ||
14 | 0x00020000, 0x00028514, 0x00032cc0, 0x00040000, 0x00050a29, 0x00065980, | ||
15 | 0x00080000, 0x000a1452, 0x000cb2ff, 0x00100000, 0x001428a3, 0x001965ff, | ||
16 | 0x00200000, 0x00285146, 0x0032cbfd, 0x00400000, 0x0050a28c, 0x006597fb, | ||
17 | 0x00800000, 0x00a14518, 0x00cb2ff5, 0x01000000, 0x01428a30, 0x01965fea, | ||
18 | 0x02000000, 0x02851460, 0x032cbfd4, 0x04000000, 0x050a28c0, 0x06597fa8, | ||
19 | 0x08000000, 0x0a145180, 0x0cb2ff50, 0x10000000, 0x1428a300, 0x1965fea0, | ||
20 | 0x20000000, 0x28514600, 0x32cbfd40, 0x40000000, 0x50a28c00, 0x6597fa80, | ||
21 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
22 | }; | ||
23 | |||
24 | /* transform data */ | ||
25 | /* floating point values scaled by 2^31 */ | ||
26 | static const int32_t qmf_48tap_half_fix[24] = { | ||
27 | 0xffff855e, 0xfffcfbca, 0xfffe28eb, 0x9de6b, 0x7f028, 0xffe40d08, | ||
28 | 0xffeef140, 0x42a692, 0x19ab1f, 0xff75dec7, 0xffe738f5, 0x100e928, | ||
29 | 0xfffdfedf, 0xfe478b84, 0x50b279, 0x2c83f88, 0xff005ad7, 0xfba2ee80, | ||
30 | 0x2685970, 0x6f42798, 0xfa6b6f10, 0xf3475f80, 0x10e7f7c0, 0x3b6c44c0 | ||
31 | }; | ||
32 | |||
33 | /* mdct window scaled by 2^31 */ | ||
34 | static const int32_t window_lookup[512] = { | ||
35 | 0xffffb10c, 0xfffd394b, 0xfff8494f, 0xfff0e025, 0xffe6fc5f, 0xffda9c15, | ||
36 | 0xffcbbce6, 0xffba5bf4, 0xffa675e8, 0xff9006f0, 0xff770aba, 0xff5b7c7e, | ||
37 | 0xff3d56f2, 0xff1c9452, 0xfef92e59, 0xfed31e45, 0xfeaa5cd5, 0xfe7ee247, | ||
38 | 0xfe50a657, 0xfe1fa041, 0xfdebc6c1, 0xfdb5100d, 0xfd7b71d5, 0xfd3ee149, | ||
39 | 0xfcff5311, 0xfcbcbb49, 0xfc770d99, 0xfc2e3d15, 0xfbe23c39, 0xfb92fd29, | ||
40 | 0xfb407141, 0xfaea8989, 0xfa913661, 0xfa3467b1, 0xf9d40cd9, 0xf9701499, | ||
41 | 0xf9086d41, 0xf89d04a9, 0xf82dc7f1, 0xf7baa3e1, 0xf74384b1, 0xf6c85611, | ||
42 | 0xf6490321, 0xf5c576b1, 0xf53d9b21, 0xf4b15a01, 0xf4209ce1, 0xf38b4c71, | ||
43 | 0xf2f15171, 0xf2529411, 0xf1aefbf1, 0xf10670a1, 0xf058d941, 0xefa61cc1, | ||
44 | 0xeeee21c1, 0xee30cec1, 0xed6e0a41, 0xeca5ba61, 0xebd7c5c1, 0xeb041241, | ||
45 | 0xea2a8601, 0xe94b0861, 0xe8657f61, 0xe779d241, 0xe687e861, 0xe58fa9e1, | ||
46 | 0xe490fec1, 0xe38bd101, 0xe28009c1, 0xe16d93e1, 0xe0545ba1, 0xdf344dc1, | ||
47 | 0xde0d5881, 0xdcdf6bc1, 0xdbaa7801, 0xda6e70c1, 0xd92b4ac1, 0xd7e0fc81, | ||
48 | 0xd68f7ec1, 0xd536cd41, 0xd3d6e5c1, 0xd26fc901, 0xd10179c1, 0xcf8bff41, | ||
49 | 0xce0f6301, 0xcc8bb241, 0xcb00fdc1, 0xc96f5b01, 0xc7d6e141, 0xc637af41, | ||
50 | 0xc491e4c1, 0xc2e5a801, 0xc1332401, 0xbf7a8701, 0xbdbc0681, 0xbbf7da01, | ||
51 | 0xba2e4181, 0xb85f7f81, 0xb68bde01, 0xb4b3a981, 0xb2d73781, 0xb0f6df01, | ||
52 | 0xaf12ff01, 0xad2bfa81, 0xab423981, 0xa9562981, 0xa7683c01, 0xa578e701, | ||
53 | 0xa388a681, 0xa197f801, 0x9fa75e81, 0x9db75f01, 0x9bc88201, 0x99db5301, | ||
54 | 0x97f06001, 0x96083601, 0x94236601, 0x92427f81, 0x90661481, 0x8e8eb481, | ||
55 | 0x8cbced01, 0x8af14d81, 0x892c5f81, 0x876eab01, 0x85b8b681, 0x840b0301, | ||
56 | 0x82660c01, 0x80ca4a01, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
57 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
58 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
59 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
60 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
61 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
62 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
63 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
64 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
65 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
66 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
67 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
68 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
69 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
70 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
71 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
72 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
73 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
74 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
75 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
76 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
77 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
78 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
79 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
80 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
81 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
82 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
83 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
84 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
85 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
86 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
87 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
88 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
89 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
90 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
91 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
92 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
93 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
94 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
95 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
96 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
97 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
98 | 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, | ||
99 | 0x80ca4a01, 0x82660c01, 0x840b0301, 0x85b8b681, 0x876eab01, 0x892c5f81, | ||
100 | 0x8af14d81, 0x8cbced01, 0x8e8eb481, 0x90661481, 0x92427f81, 0x94236601, | ||
101 | 0x96083601, 0x97f06001, 0x99db5301, 0x9bc88201, 0x9db75f01, 0x9fa75e81, | ||
102 | 0xa197f801, 0xa388a681, 0xa578e701, 0xa7683c01, 0xa9562981, 0xab423981, | ||
103 | 0xad2bfa81, 0xaf12ff01, 0xb0f6df01, 0xb2d73781, 0xb4b3a981, 0xb68bde01, | ||
104 | 0xb85f7f81, 0xba2e4181, 0xbbf7da01, 0xbdbc0681, 0xbf7a8701, 0xc1332401, | ||
105 | 0xc2e5a801, 0xc491e4c1, 0xc637af41, 0xc7d6e141, 0xc96f5b01, 0xcb00fdc1, | ||
106 | 0xcc8bb241, 0xce0f6301, 0xcf8bff41, 0xd10179c1, 0xd26fc901, 0xd3d6e5c1, | ||
107 | 0xd536cd41, 0xd68f7ec1, 0xd7e0fc81, 0xd92b4ac1, 0xda6e70c1, 0xdbaa7801, | ||
108 | 0xdcdf6bc1, 0xde0d5881, 0xdf344dc1, 0xe0545ba1, 0xe16d93e1, 0xe28009c1, | ||
109 | 0xe38bd101, 0xe490fec1, 0xe58fa9e1, 0xe687e861, 0xe779d241, 0xe8657f61, | ||
110 | 0xe94b0861, 0xea2a8601, 0xeb041241, 0xebd7c5c1, 0xeca5ba61, 0xed6e0a41, | ||
111 | 0xee30cec1, 0xeeee21c1, 0xefa61cc1, 0xf058d941, 0xf10670a1, 0xf1aefbf1, | ||
112 | 0xf2529411, 0xf2f15171, 0xf38b4c71, 0xf4209ce1, 0xf4b15a01, 0xf53d9b21, | ||
113 | 0xf5c576b1, 0xf6490321, 0xf6c85611, 0xf74384b1, 0xf7baa3e1, 0xf82dc7f1, | ||
114 | 0xf89d04a9, 0xf9086d41, 0xf9701499, 0xf9d40cd9, 0xfa3467b1, 0xfa913661, | ||
115 | 0xfaea8989, 0xfb407141, 0xfb92fd29, 0xfbe23c39, 0xfc2e3d15, 0xfc770d99, | ||
116 | 0xfcbcbb49, 0xfcff5311, 0xfd3ee149, 0xfd7b71d5, 0xfdb5100d, 0xfdebc6c1, | ||
117 | 0xfe1fa041, 0xfe50a657, 0xfe7ee247, 0xfeaa5cd5, 0xfed31e45, 0xfef92e59, | ||
118 | 0xff1c9452, 0xff3d56f2, 0xff5b7c7e, 0xff770aba, 0xff9006f0, 0xffa675e8, | ||
119 | 0xffba5bf4, 0xffcbbce6, 0xffda9c15, 0xffe6fc5f, 0xfff0e025, 0xfff8494f, | ||
120 | 0xfffd394b, 0xffffb10c, | ||
121 | }; | ||
122 | |||
123 | /* Gain tables scaled by 2^16 */ | ||
124 | static const int32_t gain_tab1[16] = { | ||
125 | 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000, | ||
126 | 0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, | ||
127 | 0x00000100, 0x00000080, 0x00000040, 0x00000020, | ||
128 | }; | ||
129 | |||
130 | static const int32_t gain_tab2[31] = { | ||
131 | 0x0003ab03, 0x00035d14, 0x0003159d, 0x0002d414, 0x000297fb, 0x000260e0, | ||
132 | 0x00022e57, 0x00020000, 0x0001d582, 0x0001ae8a, 0x00018ace, 0x00016a0a, | ||
133 | 0x00014bfe, 0x00013070, 0x0001172c, 0x00010000, 0x0000eac1, 0x0000d745, | ||
134 | 0x0000c567, 0x0000b505, 0x0000a5ff, 0x00009838, 0x00008b96, 0x00008000, | ||
135 | 0x00007560, 0x00006ba2, 0x000062b4, 0x00005a82, 0x000052ff, 0x00004c1c, | ||
136 | 0x000045cb, | ||
137 | |||
138 | }; | ||
139 | |||
140 | /* Joint-Stereo related tables, scaled by 2^16 */ | ||
141 | static const int32_t matrixCoeffs_fix[8] = { | ||
142 | 0x00000000, 0x00020000, 0x00020000, 0x00020000, | ||
143 | 0x00000000, 0x00000000, 0x00010000, 0x00010000, | ||
144 | }; | ||
145 | |||
diff --git a/apps/codecs/libatrac/dsputil.c b/apps/codecs/libatrac/dsputil.c deleted file mode 100644 index 412a934862..0000000000 --- a/apps/codecs/libatrac/dsputil.c +++ /dev/null | |||
@@ -1,4114 +0,0 @@ | |||
1 | /* | ||
2 | * DSP utils | ||
3 | * Copyright (c) 2000, 2001 Fabrice Bellard | ||
4 | * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | ||
5 | * | ||
6 | * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> | ||
7 | * | ||
8 | * This file is part of FFmpeg. | ||
9 | * | ||
10 | * FFmpeg is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU Lesser General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2.1 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * FFmpeg is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * Lesser General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU Lesser General Public | ||
21 | * License along with FFmpeg; if not, write to the Free Software | ||
22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
23 | */ | ||
24 | |||
25 | /** | ||
26 | * @file libavcodec/dsputil.c | ||
27 | * DSP utils | ||
28 | */ | ||
29 | |||
30 | #include "avcodec.h" | ||
31 | #include "dsputil.h" | ||
32 | /*#include "simple_idct.h" | ||
33 | #include "faandct.h" | ||
34 | #include "faanidct.h" | ||
35 | #include "mathops.h" | ||
36 | #include "h263.h" | ||
37 | #include "snow.h" */ | ||
38 | |||
39 | /* snow.c */ | ||
40 | void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); | ||
41 | |||
42 | /* vorbis.c */ | ||
43 | void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); | ||
44 | |||
45 | /* ac3dec.c */ | ||
46 | void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); | ||
47 | |||
48 | /* flacenc.c */ | ||
49 | void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc); | ||
50 | |||
51 | /* pngdec.c */ | ||
52 | void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); | ||
53 | |||
54 | /* eaidct.c */ | ||
55 | void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); | ||
56 | |||
57 | uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; | ||
58 | uint32_t ff_squareTbl[512] = {0, }; | ||
59 | |||
60 | // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | ||
61 | #define pb_7f (~0UL/255 * 0x7f) | ||
62 | #define pb_80 (~0UL/255 * 0x80) | ||
63 | |||
64 | const uint8_t ff_zigzag_direct[64] = { | ||
65 | 0, 1, 8, 16, 9, 2, 3, 10, | ||
66 | 17, 24, 32, 25, 18, 11, 4, 5, | ||
67 | 12, 19, 26, 33, 40, 48, 41, 34, | ||
68 | 27, 20, 13, 6, 7, 14, 21, 28, | ||
69 | 35, 42, 49, 56, 57, 50, 43, 36, | ||
70 | 29, 22, 15, 23, 30, 37, 44, 51, | ||
71 | 58, 59, 52, 45, 38, 31, 39, 46, | ||
72 | 53, 60, 61, 54, 47, 55, 62, 63 | ||
73 | }; | ||
74 | |||
75 | /* Specific zigzag scan for 248 idct. NOTE that unlike the | ||
76 | specification, we interleave the fields */ | ||
77 | const uint8_t ff_zigzag248_direct[64] = { | ||
78 | 0, 8, 1, 9, 16, 24, 2, 10, | ||
79 | 17, 25, 32, 40, 48, 56, 33, 41, | ||
80 | 18, 26, 3, 11, 4, 12, 19, 27, | ||
81 | 34, 42, 49, 57, 50, 58, 35, 43, | ||
82 | 20, 28, 5, 13, 6, 14, 21, 29, | ||
83 | 36, 44, 51, 59, 52, 60, 37, 45, | ||
84 | 22, 30, 7, 15, 23, 31, 38, 46, | ||
85 | 53, 61, 54, 62, 39, 47, 55, 63, | ||
86 | }; | ||
87 | |||
88 | /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | ||
89 | DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, }; | ||
90 | |||
91 | const uint8_t ff_alternate_horizontal_scan[64] = { | ||
92 | 0, 1, 2, 3, 8, 9, 16, 17, | ||
93 | 10, 11, 4, 5, 6, 7, 15, 14, | ||
94 | 13, 12, 19, 18, 24, 25, 32, 33, | ||
95 | 26, 27, 20, 21, 22, 23, 28, 29, | ||
96 | 30, 31, 34, 35, 40, 41, 48, 49, | ||
97 | 42, 43, 36, 37, 38, 39, 44, 45, | ||
98 | 46, 47, 50, 51, 56, 57, 58, 59, | ||
99 | 52, 53, 54, 55, 60, 61, 62, 63, | ||
100 | }; | ||
101 | |||
102 | const uint8_t ff_alternate_vertical_scan[64] = { | ||
103 | 0, 8, 16, 24, 1, 9, 2, 10, | ||
104 | 17, 25, 32, 40, 48, 56, 57, 49, | ||
105 | 41, 33, 26, 18, 3, 11, 4, 12, | ||
106 | 19, 27, 34, 42, 50, 58, 35, 43, | ||
107 | 51, 59, 20, 28, 5, 13, 6, 14, | ||
108 | 21, 29, 36, 44, 52, 60, 37, 45, | ||
109 | 53, 61, 22, 30, 7, 15, 23, 31, | ||
110 | 38, 46, 54, 62, 39, 47, 55, 63, | ||
111 | }; | ||
112 | |||
113 | /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ | ||
114 | const uint32_t ff_inverse[256]={ | ||
115 | 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, | ||
116 | 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | ||
117 | 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, | ||
118 | 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, | ||
119 | 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, | ||
120 | 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, | ||
121 | 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, | ||
122 | 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, | ||
123 | 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, | ||
124 | 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, | ||
125 | 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, | ||
126 | 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, | ||
127 | 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, | ||
128 | 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, | ||
129 | 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, | ||
130 | 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, | ||
131 | 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, | ||
132 | 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, | ||
133 | 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, | ||
134 | 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, | ||
135 | 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, | ||
136 | 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, | ||
137 | 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, | ||
138 | 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, | ||
139 | 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, | ||
140 | 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, | ||
141 | 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, | ||
142 | 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, | ||
143 | 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, | ||
144 | 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, | ||
145 | 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, | ||
146 | 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, | ||
147 | }; | ||
148 | |||
149 | /* Input permutation for the simple_idct_mmx */ | ||
150 | static const uint8_t simple_mmx_permutation[64]={ | ||
151 | 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | ||
152 | 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | ||
153 | 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | ||
154 | 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | ||
155 | 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | ||
156 | 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | ||
157 | 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | ||
158 | 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | ||
159 | }; | ||
160 | |||
161 | static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; | ||
162 | |||
163 | void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){ | ||
164 | int i; | ||
165 | int end; | ||
166 | |||
167 | st->scantable= src_scantable; | ||
168 | |||
169 | for(i=0; i<64; i++){ | ||
170 | int j; | ||
171 | j = src_scantable[i]; | ||
172 | st->permutated[i] = permutation[j]; | ||
173 | #if ARCH_PPC | ||
174 | st->inverse[j] = i; | ||
175 | #endif | ||
176 | } | ||
177 | |||
178 | end=-1; | ||
179 | for(i=0; i<64; i++){ | ||
180 | int j; | ||
181 | j = st->permutated[i]; | ||
182 | if(j>end) end=j; | ||
183 | st->raster_end[i]= end; | ||
184 | } | ||
185 | } | ||
186 | |||
187 | #if CONFIG_SNOW_ENCODER //dwt is in snow.c | ||
188 | static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ | ||
189 | int s, i, j; | ||
190 | const int dec_count= w==8 ? 3 : 4; | ||
191 | int tmp[32*32]; | ||
192 | int level, ori; | ||
193 | static const int scale[2][2][4][4]={ | ||
194 | { | ||
195 | { | ||
196 | // 9/7 8x8 dec=3 | ||
197 | {268, 239, 239, 213}, | ||
198 | { 0, 224, 224, 152}, | ||
199 | { 0, 135, 135, 110}, | ||
200 | },{ | ||
201 | // 9/7 16x16 or 32x32 dec=4 | ||
202 | {344, 310, 310, 280}, | ||
203 | { 0, 320, 320, 228}, | ||
204 | { 0, 175, 175, 136}, | ||
205 | { 0, 129, 129, 102}, | ||
206 | } | ||
207 | },{ | ||
208 | { | ||
209 | // 5/3 8x8 dec=3 | ||
210 | {275, 245, 245, 218}, | ||
211 | { 0, 230, 230, 156}, | ||
212 | { 0, 138, 138, 113}, | ||
213 | },{ | ||
214 | // 5/3 16x16 or 32x32 dec=4 | ||
215 | {352, 317, 317, 286}, | ||
216 | { 0, 328, 328, 233}, | ||
217 | { 0, 180, 180, 140}, | ||
218 | { 0, 132, 132, 105}, | ||
219 | } | ||
220 | } | ||
221 | }; | ||
222 | |||
223 | for (i = 0; i < h; i++) { | ||
224 | for (j = 0; j < w; j+=4) { | ||
225 | tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; | ||
226 | tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; | ||
227 | tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; | ||
228 | tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; | ||
229 | } | ||
230 | pix1 += line_size; | ||
231 | pix2 += line_size; | ||
232 | } | ||
233 | |||
234 | ff_spatial_dwt(tmp, w, h, 32, type, dec_count); | ||
235 | |||
236 | s=0; | ||
237 | assert(w==h); | ||
238 | for(level=0; level<dec_count; level++){ | ||
239 | for(ori= level ? 1 : 0; ori<4; ori++){ | ||
240 | int size= w>>(dec_count-level); | ||
241 | int sx= (ori&1) ? size : 0; | ||
242 | int stride= 32<<(dec_count-level); | ||
243 | int sy= (ori&2) ? stride>>1 : 0; | ||
244 | |||
245 | for(i=0; i<size; i++){ | ||
246 | for(j=0; j<size; j++){ | ||
247 | int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori]; | ||
248 | s += FFABS(v); | ||
249 | } | ||
250 | } | ||
251 | } | ||
252 | } | ||
253 | assert(s>=0); | ||
254 | return s>>9; | ||
255 | } | ||
256 | |||
257 | static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | ||
258 | return w_c(v, pix1, pix2, line_size, 8, h, 1); | ||
259 | } | ||
260 | |||
261 | static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | ||
262 | return w_c(v, pix1, pix2, line_size, 8, h, 0); | ||
263 | } | ||
264 | |||
265 | static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | ||
266 | return w_c(v, pix1, pix2, line_size, 16, h, 1); | ||
267 | } | ||
268 | |||
269 | static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | ||
270 | return w_c(v, pix1, pix2, line_size, 16, h, 0); | ||
271 | } | ||
272 | |||
273 | int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | ||
274 | return w_c(v, pix1, pix2, line_size, 32, h, 1); | ||
275 | } | ||
276 | |||
277 | int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | ||
278 | return w_c(v, pix1, pix2, line_size, 32, h, 0); | ||
279 | } | ||
280 | #endif | ||
281 | |||
282 | /** | ||
283 | * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples. | ||
284 | * @param buf destination buffer | ||
285 | * @param src source buffer | ||
286 | * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers | ||
287 | * @param block_w width of block | ||
288 | * @param block_h height of block | ||
289 | * @param src_x x coordinate of the top left sample of the block in the source buffer | ||
290 | * @param src_y y coordinate of the top left sample of the block in the source buffer | ||
291 | * @param w width of the source buffer | ||
292 | * @param h height of the source buffer | ||
293 | */ | ||
294 | void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, | ||
295 | int src_x, int src_y, int w, int h){ | ||
296 | int x, y; | ||
297 | int start_y, start_x, end_y, end_x; | ||
298 | |||
299 | if(src_y>= h){ | ||
300 | src+= (h-1-src_y)*linesize; | ||
301 | src_y=h-1; | ||
302 | }else if(src_y<=-block_h){ | ||
303 | src+= (1-block_h-src_y)*linesize; | ||
304 | src_y=1-block_h; | ||
305 | } | ||
306 | if(src_x>= w){ | ||
307 | src+= (w-1-src_x); | ||
308 | src_x=w-1; | ||
309 | }else if(src_x<=-block_w){ | ||
310 | src+= (1-block_w-src_x); | ||
311 | src_x=1-block_w; | ||
312 | } | ||
313 | |||
314 | start_y= FFMAX(0, -src_y); | ||
315 | start_x= FFMAX(0, -src_x); | ||
316 | end_y= FFMIN(block_h, h-src_y); | ||
317 | end_x= FFMIN(block_w, w-src_x); | ||
318 | |||
319 | // copy existing part | ||
320 | for(y=start_y; y<end_y; y++){ | ||
321 | for(x=start_x; x<end_x; x++){ | ||
322 | buf[x + y*linesize]= src[x + y*linesize]; | ||
323 | } | ||
324 | } | ||
325 | |||
326 | //top | ||
327 | for(y=0; y<start_y; y++){ | ||
328 | for(x=start_x; x<end_x; x++){ | ||
329 | buf[x + y*linesize]= buf[x + start_y*linesize]; | ||
330 | } | ||
331 | } | ||
332 | |||
333 | //bottom | ||
334 | for(y=end_y; y<block_h; y++){ | ||
335 | for(x=start_x; x<end_x; x++){ | ||
336 | buf[x + y*linesize]= buf[x + (end_y-1)*linesize]; | ||
337 | } | ||
338 | } | ||
339 | |||
340 | for(y=0; y<block_h; y++){ | ||
341 | //left | ||
342 | for(x=0; x<start_x; x++){ | ||
343 | buf[x + y*linesize]= buf[start_x + y*linesize]; | ||
344 | } | ||
345 | |||
346 | //right | ||
347 | for(x=end_x; x<block_w; x++){ | ||
348 | buf[x + y*linesize]= buf[end_x - 1 + y*linesize]; | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | |||
353 | #if 0 | ||
354 | static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) | ||
355 | { | ||
356 | int i; | ||
357 | |||
358 | /* read the pixels */ | ||
359 | for(i=0;i<8;i++) { | ||
360 | block[0] = pixels[0]; | ||
361 | block[1] = pixels[1]; | ||
362 | block[2] = pixels[2]; | ||
363 | block[3] = pixels[3]; | ||
364 | block[4] = pixels[4]; | ||
365 | block[5] = pixels[5]; | ||
366 | block[6] = pixels[6]; | ||
367 | block[7] = pixels[7]; | ||
368 | pixels += line_size; | ||
369 | block += 8; | ||
370 | } | ||
371 | } | ||
372 | |||
373 | static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, | ||
374 | const uint8_t *s2, int stride){ | ||
375 | int i; | ||
376 | |||
377 | /* read the pixels */ | ||
378 | for(i=0;i<8;i++) { | ||
379 | block[0] = s1[0] - s2[0]; | ||
380 | block[1] = s1[1] - s2[1]; | ||
381 | block[2] = s1[2] - s2[2]; | ||
382 | block[3] = s1[3] - s2[3]; | ||
383 | block[4] = s1[4] - s2[4]; | ||
384 | block[5] = s1[5] - s2[5]; | ||
385 | block[6] = s1[6] - s2[6]; | ||
386 | block[7] = s1[7] - s2[7]; | ||
387 | s1 += stride; | ||
388 | s2 += stride; | ||
389 | block += 8; | ||
390 | } | ||
391 | } | ||
392 | |||
393 | |||
394 | static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
395 | int line_size) | ||
396 | { | ||
397 | int i; | ||
398 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
399 | |||
400 | /* read the pixels */ | ||
401 | for(i=0;i<8;i++) { | ||
402 | pixels[0] = cm[block[0]]; | ||
403 | pixels[1] = cm[block[1]]; | ||
404 | pixels[2] = cm[block[2]]; | ||
405 | pixels[3] = cm[block[3]]; | ||
406 | pixels[4] = cm[block[4]]; | ||
407 | pixels[5] = cm[block[5]]; | ||
408 | pixels[6] = cm[block[6]]; | ||
409 | pixels[7] = cm[block[7]]; | ||
410 | |||
411 | pixels += line_size; | ||
412 | block += 8; | ||
413 | } | ||
414 | } | ||
415 | |||
416 | static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
417 | int line_size) | ||
418 | { | ||
419 | int i; | ||
420 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
421 | |||
422 | /* read the pixels */ | ||
423 | for(i=0;i<4;i++) { | ||
424 | pixels[0] = cm[block[0]]; | ||
425 | pixels[1] = cm[block[1]]; | ||
426 | pixels[2] = cm[block[2]]; | ||
427 | pixels[3] = cm[block[3]]; | ||
428 | |||
429 | pixels += line_size; | ||
430 | block += 8; | ||
431 | } | ||
432 | } | ||
433 | |||
434 | static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
435 | int line_size) | ||
436 | { | ||
437 | int i; | ||
438 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
439 | |||
440 | /* read the pixels */ | ||
441 | for(i=0;i<2;i++) { | ||
442 | pixels[0] = cm[block[0]]; | ||
443 | pixels[1] = cm[block[1]]; | ||
444 | |||
445 | pixels += line_size; | ||
446 | block += 8; | ||
447 | } | ||
448 | } | ||
449 | |||
450 | static void put_signed_pixels_clamped_c(const DCTELEM *block, | ||
451 | uint8_t *restrict pixels, | ||
452 | int line_size) | ||
453 | { | ||
454 | int i, j; | ||
455 | |||
456 | for (i = 0; i < 8; i++) { | ||
457 | for (j = 0; j < 8; j++) { | ||
458 | if (*block < -128) | ||
459 | *pixels = 0; | ||
460 | else if (*block > 127) | ||
461 | *pixels = 255; | ||
462 | else | ||
463 | *pixels = (uint8_t)(*block + 128); | ||
464 | block++; | ||
465 | pixels++; | ||
466 | } | ||
467 | pixels += (line_size - 8); | ||
468 | } | ||
469 | } | ||
470 | |||
471 | static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
472 | int line_size) | ||
473 | { | ||
474 | int i; | ||
475 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
476 | |||
477 | /* read the pixels */ | ||
478 | for(i=0;i<8;i++) { | ||
479 | pixels[0] = cm[pixels[0] + block[0]]; | ||
480 | pixels[1] = cm[pixels[1] + block[1]]; | ||
481 | pixels[2] = cm[pixels[2] + block[2]]; | ||
482 | pixels[3] = cm[pixels[3] + block[3]]; | ||
483 | pixels[4] = cm[pixels[4] + block[4]]; | ||
484 | pixels[5] = cm[pixels[5] + block[5]]; | ||
485 | pixels[6] = cm[pixels[6] + block[6]]; | ||
486 | pixels[7] = cm[pixels[7] + block[7]]; | ||
487 | pixels += line_size; | ||
488 | block += 8; | ||
489 | } | ||
490 | } | ||
491 | |||
492 | static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
493 | int line_size) | ||
494 | { | ||
495 | int i; | ||
496 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
497 | |||
498 | /* read the pixels */ | ||
499 | for(i=0;i<4;i++) { | ||
500 | pixels[0] = cm[pixels[0] + block[0]]; | ||
501 | pixels[1] = cm[pixels[1] + block[1]]; | ||
502 | pixels[2] = cm[pixels[2] + block[2]]; | ||
503 | pixels[3] = cm[pixels[3] + block[3]]; | ||
504 | pixels += line_size; | ||
505 | block += 8; | ||
506 | } | ||
507 | } | ||
508 | |||
509 | static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | ||
510 | int line_size) | ||
511 | { | ||
512 | int i; | ||
513 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
514 | |||
515 | /* read the pixels */ | ||
516 | for(i=0;i<2;i++) { | ||
517 | pixels[0] = cm[pixels[0] + block[0]]; | ||
518 | pixels[1] = cm[pixels[1] + block[1]]; | ||
519 | pixels += line_size; | ||
520 | block += 8; | ||
521 | } | ||
522 | } | ||
523 | |||
524 | static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | ||
525 | { | ||
526 | int i; | ||
527 | for(i=0;i<8;i++) { | ||
528 | pixels[0] += block[0]; | ||
529 | pixels[1] += block[1]; | ||
530 | pixels[2] += block[2]; | ||
531 | pixels[3] += block[3]; | ||
532 | pixels[4] += block[4]; | ||
533 | pixels[5] += block[5]; | ||
534 | pixels[6] += block[6]; | ||
535 | pixels[7] += block[7]; | ||
536 | pixels += line_size; | ||
537 | block += 8; | ||
538 | } | ||
539 | } | ||
540 | |||
541 | static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | ||
542 | { | ||
543 | int i; | ||
544 | for(i=0;i<4;i++) { | ||
545 | pixels[0] += block[0]; | ||
546 | pixels[1] += block[1]; | ||
547 | pixels[2] += block[2]; | ||
548 | pixels[3] += block[3]; | ||
549 | pixels += line_size; | ||
550 | block += 4; | ||
551 | } | ||
552 | } | ||
553 | |||
554 | static int sum_abs_dctelem_c(DCTELEM *block) | ||
555 | { | ||
556 | int sum=0, i; | ||
557 | for(i=0; i<64; i++) | ||
558 | sum+= FFABS(block[i]); | ||
559 | return sum; | ||
560 | } | ||
561 | |||
562 | #if 0 | ||
563 | |||
564 | #define PIXOP2(OPNAME, OP) \ | ||
565 | static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
566 | {\ | ||
567 | int i;\ | ||
568 | for(i=0; i<h; i++){\ | ||
569 | OP(*((uint64_t*)block), AV_RN64(pixels));\ | ||
570 | pixels+=line_size;\ | ||
571 | block +=line_size;\ | ||
572 | }\ | ||
573 | }\ | ||
574 | \ | ||
575 | static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
576 | {\ | ||
577 | int i;\ | ||
578 | for(i=0; i<h; i++){\ | ||
579 | const uint64_t a= AV_RN64(pixels );\ | ||
580 | const uint64_t b= AV_RN64(pixels+1);\ | ||
581 | OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | ||
582 | pixels+=line_size;\ | ||
583 | block +=line_size;\ | ||
584 | }\ | ||
585 | }\ | ||
586 | \ | ||
587 | static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
588 | {\ | ||
589 | int i;\ | ||
590 | for(i=0; i<h; i++){\ | ||
591 | const uint64_t a= AV_RN64(pixels );\ | ||
592 | const uint64_t b= AV_RN64(pixels+1);\ | ||
593 | OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | ||
594 | pixels+=line_size;\ | ||
595 | block +=line_size;\ | ||
596 | }\ | ||
597 | }\ | ||
598 | \ | ||
599 | static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
600 | {\ | ||
601 | int i;\ | ||
602 | for(i=0; i<h; i++){\ | ||
603 | const uint64_t a= AV_RN64(pixels );\ | ||
604 | const uint64_t b= AV_RN64(pixels+line_size);\ | ||
605 | OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | ||
606 | pixels+=line_size;\ | ||
607 | block +=line_size;\ | ||
608 | }\ | ||
609 | }\ | ||
610 | \ | ||
611 | static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
612 | {\ | ||
613 | int i;\ | ||
614 | for(i=0; i<h; i++){\ | ||
615 | const uint64_t a= AV_RN64(pixels );\ | ||
616 | const uint64_t b= AV_RN64(pixels+line_size);\ | ||
617 | OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | ||
618 | pixels+=line_size;\ | ||
619 | block +=line_size;\ | ||
620 | }\ | ||
621 | }\ | ||
622 | \ | ||
623 | static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
624 | {\ | ||
625 | int i;\ | ||
626 | const uint64_t a= AV_RN64(pixels );\ | ||
627 | const uint64_t b= AV_RN64(pixels+1);\ | ||
628 | uint64_t l0= (a&0x0303030303030303ULL)\ | ||
629 | + (b&0x0303030303030303ULL)\ | ||
630 | + 0x0202020202020202ULL;\ | ||
631 | uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||
632 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | ||
633 | uint64_t l1,h1;\ | ||
634 | \ | ||
635 | pixels+=line_size;\ | ||
636 | for(i=0; i<h; i+=2){\ | ||
637 | uint64_t a= AV_RN64(pixels );\ | ||
638 | uint64_t b= AV_RN64(pixels+1);\ | ||
639 | l1= (a&0x0303030303030303ULL)\ | ||
640 | + (b&0x0303030303030303ULL);\ | ||
641 | h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||
642 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | ||
643 | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | ||
644 | pixels+=line_size;\ | ||
645 | block +=line_size;\ | ||
646 | a= AV_RN64(pixels );\ | ||
647 | b= AV_RN64(pixels+1);\ | ||
648 | l0= (a&0x0303030303030303ULL)\ | ||
649 | + (b&0x0303030303030303ULL)\ | ||
650 | + 0x0202020202020202ULL;\ | ||
651 | h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||
652 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | ||
653 | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | ||
654 | pixels+=line_size;\ | ||
655 | block +=line_size;\ | ||
656 | }\ | ||
657 | }\ | ||
658 | \ | ||
659 | static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
660 | {\ | ||
661 | int i;\ | ||
662 | const uint64_t a= AV_RN64(pixels );\ | ||
663 | const uint64_t b= AV_RN64(pixels+1);\ | ||
664 | uint64_t l0= (a&0x0303030303030303ULL)\ | ||
665 | + (b&0x0303030303030303ULL)\ | ||
666 | + 0x0101010101010101ULL;\ | ||
667 | uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||
668 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | ||
669 | uint64_t l1,h1;\ | ||
670 | \ | ||
671 | pixels+=line_size;\ | ||
672 | for(i=0; i<h; i+=2){\ | ||
673 | uint64_t a= AV_RN64(pixels );\ | ||
674 | uint64_t b= AV_RN64(pixels+1);\ | ||
675 | l1= (a&0x0303030303030303ULL)\ | ||
676 | + (b&0x0303030303030303ULL);\ | ||
677 | h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||
678 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | ||
679 | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | ||
680 | pixels+=line_size;\ | ||
681 | block +=line_size;\ | ||
682 | a= AV_RN64(pixels );\ | ||
683 | b= AV_RN64(pixels+1);\ | ||
684 | l0= (a&0x0303030303030303ULL)\ | ||
685 | + (b&0x0303030303030303ULL)\ | ||
686 | + 0x0101010101010101ULL;\ | ||
687 | h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | ||
688 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | ||
689 | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | ||
690 | pixels+=line_size;\ | ||
691 | block +=line_size;\ | ||
692 | }\ | ||
693 | }\ | ||
694 | \ | ||
695 | CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ | ||
696 | CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ | ||
697 | CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ | ||
698 | CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ | ||
699 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ | ||
700 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ | ||
701 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) | ||
702 | |||
703 | #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | ||
704 | #else // 64 bit variant | ||
705 | |||
706 | #define PIXOP2(OPNAME, OP) \ | ||
707 | static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
708 | int i;\ | ||
709 | for(i=0; i<h; i++){\ | ||
710 | OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ | ||
711 | pixels+=line_size;\ | ||
712 | block +=line_size;\ | ||
713 | }\ | ||
714 | }\ | ||
715 | static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
716 | int i;\ | ||
717 | for(i=0; i<h; i++){\ | ||
718 | OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ | ||
719 | pixels+=line_size;\ | ||
720 | block +=line_size;\ | ||
721 | }\ | ||
722 | }\ | ||
723 | static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
724 | int i;\ | ||
725 | for(i=0; i<h; i++){\ | ||
726 | OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ | ||
727 | OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ | ||
728 | pixels+=line_size;\ | ||
729 | block +=line_size;\ | ||
730 | }\ | ||
731 | }\ | ||
732 | static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
733 | OPNAME ## _pixels8_c(block, pixels, line_size, h);\ | ||
734 | }\ | ||
735 | \ | ||
736 | static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | ||
737 | int src_stride1, int src_stride2, int h){\ | ||
738 | int i;\ | ||
739 | for(i=0; i<h; i++){\ | ||
740 | uint32_t a,b;\ | ||
741 | a= AV_RN32(&src1[i*src_stride1 ]);\ | ||
742 | b= AV_RN32(&src2[i*src_stride2 ]);\ | ||
743 | OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ | ||
744 | a= AV_RN32(&src1[i*src_stride1+4]);\ | ||
745 | b= AV_RN32(&src2[i*src_stride2+4]);\ | ||
746 | OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ | ||
747 | }\ | ||
748 | }\ | ||
749 | \ | ||
750 | static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | ||
751 | int src_stride1, int src_stride2, int h){\ | ||
752 | int i;\ | ||
753 | for(i=0; i<h; i++){\ | ||
754 | uint32_t a,b;\ | ||
755 | a= AV_RN32(&src1[i*src_stride1 ]);\ | ||
756 | b= AV_RN32(&src2[i*src_stride2 ]);\ | ||
757 | OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | ||
758 | a= AV_RN32(&src1[i*src_stride1+4]);\ | ||
759 | b= AV_RN32(&src2[i*src_stride2+4]);\ | ||
760 | OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ | ||
761 | }\ | ||
762 | }\ | ||
763 | \ | ||
764 | static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | ||
765 | int src_stride1, int src_stride2, int h){\ | ||
766 | int i;\ | ||
767 | for(i=0; i<h; i++){\ | ||
768 | uint32_t a,b;\ | ||
769 | a= AV_RN32(&src1[i*src_stride1 ]);\ | ||
770 | b= AV_RN32(&src2[i*src_stride2 ]);\ | ||
771 | OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | ||
772 | }\ | ||
773 | }\ | ||
774 | \ | ||
775 | static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | ||
776 | int src_stride1, int src_stride2, int h){\ | ||
777 | int i;\ | ||
778 | for(i=0; i<h; i++){\ | ||
779 | uint32_t a,b;\ | ||
780 | a= AV_RN16(&src1[i*src_stride1 ]);\ | ||
781 | b= AV_RN16(&src2[i*src_stride2 ]);\ | ||
782 | OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | ||
783 | }\ | ||
784 | }\ | ||
785 | \ | ||
786 | static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | ||
787 | int src_stride1, int src_stride2, int h){\ | ||
788 | OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | ||
789 | OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | ||
790 | }\ | ||
791 | \ | ||
792 | static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | ||
793 | int src_stride1, int src_stride2, int h){\ | ||
794 | OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | ||
795 | OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | ||
796 | }\ | ||
797 | \ | ||
798 | static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
799 | OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | ||
800 | }\ | ||
801 | \ | ||
802 | static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
803 | OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | ||
804 | }\ | ||
805 | \ | ||
806 | static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
807 | OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | ||
808 | }\ | ||
809 | \ | ||
810 | static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
811 | OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | ||
812 | }\ | ||
813 | \ | ||
814 | static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | ||
815 | int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | ||
816 | int i;\ | ||
817 | for(i=0; i<h; i++){\ | ||
818 | uint32_t a, b, c, d, l0, l1, h0, h1;\ | ||
819 | a= AV_RN32(&src1[i*src_stride1]);\ | ||
820 | b= AV_RN32(&src2[i*src_stride2]);\ | ||
821 | c= AV_RN32(&src3[i*src_stride3]);\ | ||
822 | d= AV_RN32(&src4[i*src_stride4]);\ | ||
823 | l0= (a&0x03030303UL)\ | ||
824 | + (b&0x03030303UL)\ | ||
825 | + 0x02020202UL;\ | ||
826 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
827 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
828 | l1= (c&0x03030303UL)\ | ||
829 | + (d&0x03030303UL);\ | ||
830 | h1= ((c&0xFCFCFCFCUL)>>2)\ | ||
831 | + ((d&0xFCFCFCFCUL)>>2);\ | ||
832 | OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
833 | a= AV_RN32(&src1[i*src_stride1+4]);\ | ||
834 | b= AV_RN32(&src2[i*src_stride2+4]);\ | ||
835 | c= AV_RN32(&src3[i*src_stride3+4]);\ | ||
836 | d= AV_RN32(&src4[i*src_stride4+4]);\ | ||
837 | l0= (a&0x03030303UL)\ | ||
838 | + (b&0x03030303UL)\ | ||
839 | + 0x02020202UL;\ | ||
840 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
841 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
842 | l1= (c&0x03030303UL)\ | ||
843 | + (d&0x03030303UL);\ | ||
844 | h1= ((c&0xFCFCFCFCUL)>>2)\ | ||
845 | + ((d&0xFCFCFCFCUL)>>2);\ | ||
846 | OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
847 | }\ | ||
848 | }\ | ||
849 | \ | ||
850 | static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
851 | OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | ||
852 | }\ | ||
853 | \ | ||
854 | static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
855 | OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | ||
856 | }\ | ||
857 | \ | ||
858 | static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
859 | OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | ||
860 | }\ | ||
861 | \ | ||
862 | static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
863 | OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | ||
864 | }\ | ||
865 | \ | ||
866 | static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | ||
867 | int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | ||
868 | int i;\ | ||
869 | for(i=0; i<h; i++){\ | ||
870 | uint32_t a, b, c, d, l0, l1, h0, h1;\ | ||
871 | a= AV_RN32(&src1[i*src_stride1]);\ | ||
872 | b= AV_RN32(&src2[i*src_stride2]);\ | ||
873 | c= AV_RN32(&src3[i*src_stride3]);\ | ||
874 | d= AV_RN32(&src4[i*src_stride4]);\ | ||
875 | l0= (a&0x03030303UL)\ | ||
876 | + (b&0x03030303UL)\ | ||
877 | + 0x01010101UL;\ | ||
878 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
879 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
880 | l1= (c&0x03030303UL)\ | ||
881 | + (d&0x03030303UL);\ | ||
882 | h1= ((c&0xFCFCFCFCUL)>>2)\ | ||
883 | + ((d&0xFCFCFCFCUL)>>2);\ | ||
884 | OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
885 | a= AV_RN32(&src1[i*src_stride1+4]);\ | ||
886 | b= AV_RN32(&src2[i*src_stride2+4]);\ | ||
887 | c= AV_RN32(&src3[i*src_stride3+4]);\ | ||
888 | d= AV_RN32(&src4[i*src_stride4+4]);\ | ||
889 | l0= (a&0x03030303UL)\ | ||
890 | + (b&0x03030303UL)\ | ||
891 | + 0x01010101UL;\ | ||
892 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
893 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
894 | l1= (c&0x03030303UL)\ | ||
895 | + (d&0x03030303UL);\ | ||
896 | h1= ((c&0xFCFCFCFCUL)>>2)\ | ||
897 | + ((d&0xFCFCFCFCUL)>>2);\ | ||
898 | OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
899 | }\ | ||
900 | }\ | ||
901 | static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | ||
902 | int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | ||
903 | OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | ||
904 | OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | ||
905 | }\ | ||
906 | static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | ||
907 | int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | ||
908 | OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | ||
909 | OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | ||
910 | }\ | ||
911 | \ | ||
912 | static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
913 | {\ | ||
914 | int i, a0, b0, a1, b1;\ | ||
915 | a0= pixels[0];\ | ||
916 | b0= pixels[1] + 2;\ | ||
917 | a0 += b0;\ | ||
918 | b0 += pixels[2];\ | ||
919 | \ | ||
920 | pixels+=line_size;\ | ||
921 | for(i=0; i<h; i+=2){\ | ||
922 | a1= pixels[0];\ | ||
923 | b1= pixels[1];\ | ||
924 | a1 += b1;\ | ||
925 | b1 += pixels[2];\ | ||
926 | \ | ||
927 | block[0]= (a1+a0)>>2; /* FIXME non put */\ | ||
928 | block[1]= (b1+b0)>>2;\ | ||
929 | \ | ||
930 | pixels+=line_size;\ | ||
931 | block +=line_size;\ | ||
932 | \ | ||
933 | a0= pixels[0];\ | ||
934 | b0= pixels[1] + 2;\ | ||
935 | a0 += b0;\ | ||
936 | b0 += pixels[2];\ | ||
937 | \ | ||
938 | block[0]= (a1+a0)>>2;\ | ||
939 | block[1]= (b1+b0)>>2;\ | ||
940 | pixels+=line_size;\ | ||
941 | block +=line_size;\ | ||
942 | }\ | ||
943 | }\ | ||
944 | \ | ||
945 | static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
946 | {\ | ||
947 | int i;\ | ||
948 | const uint32_t a= AV_RN32(pixels );\ | ||
949 | const uint32_t b= AV_RN32(pixels+1);\ | ||
950 | uint32_t l0= (a&0x03030303UL)\ | ||
951 | + (b&0x03030303UL)\ | ||
952 | + 0x02020202UL;\ | ||
953 | uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
954 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
955 | uint32_t l1,h1;\ | ||
956 | \ | ||
957 | pixels+=line_size;\ | ||
958 | for(i=0; i<h; i+=2){\ | ||
959 | uint32_t a= AV_RN32(pixels );\ | ||
960 | uint32_t b= AV_RN32(pixels+1);\ | ||
961 | l1= (a&0x03030303UL)\ | ||
962 | + (b&0x03030303UL);\ | ||
963 | h1= ((a&0xFCFCFCFCUL)>>2)\ | ||
964 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
965 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
966 | pixels+=line_size;\ | ||
967 | block +=line_size;\ | ||
968 | a= AV_RN32(pixels );\ | ||
969 | b= AV_RN32(pixels+1);\ | ||
970 | l0= (a&0x03030303UL)\ | ||
971 | + (b&0x03030303UL)\ | ||
972 | + 0x02020202UL;\ | ||
973 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
974 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
975 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
976 | pixels+=line_size;\ | ||
977 | block +=line_size;\ | ||
978 | }\ | ||
979 | }\ | ||
980 | \ | ||
981 | static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
982 | {\ | ||
983 | int j;\ | ||
984 | for(j=0; j<2; j++){\ | ||
985 | int i;\ | ||
986 | const uint32_t a= AV_RN32(pixels );\ | ||
987 | const uint32_t b= AV_RN32(pixels+1);\ | ||
988 | uint32_t l0= (a&0x03030303UL)\ | ||
989 | + (b&0x03030303UL)\ | ||
990 | + 0x02020202UL;\ | ||
991 | uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
992 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
993 | uint32_t l1,h1;\ | ||
994 | \ | ||
995 | pixels+=line_size;\ | ||
996 | for(i=0; i<h; i+=2){\ | ||
997 | uint32_t a= AV_RN32(pixels );\ | ||
998 | uint32_t b= AV_RN32(pixels+1);\ | ||
999 | l1= (a&0x03030303UL)\ | ||
1000 | + (b&0x03030303UL);\ | ||
1001 | h1= ((a&0xFCFCFCFCUL)>>2)\ | ||
1002 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1003 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
1004 | pixels+=line_size;\ | ||
1005 | block +=line_size;\ | ||
1006 | a= AV_RN32(pixels );\ | ||
1007 | b= AV_RN32(pixels+1);\ | ||
1008 | l0= (a&0x03030303UL)\ | ||
1009 | + (b&0x03030303UL)\ | ||
1010 | + 0x02020202UL;\ | ||
1011 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
1012 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1013 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
1014 | pixels+=line_size;\ | ||
1015 | block +=line_size;\ | ||
1016 | }\ | ||
1017 | pixels+=4-line_size*(h+1);\ | ||
1018 | block +=4-line_size*h;\ | ||
1019 | }\ | ||
1020 | }\ | ||
1021 | \ | ||
1022 | static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | ||
1023 | {\ | ||
1024 | int j;\ | ||
1025 | for(j=0; j<2; j++){\ | ||
1026 | int i;\ | ||
1027 | const uint32_t a= AV_RN32(pixels );\ | ||
1028 | const uint32_t b= AV_RN32(pixels+1);\ | ||
1029 | uint32_t l0= (a&0x03030303UL)\ | ||
1030 | + (b&0x03030303UL)\ | ||
1031 | + 0x01010101UL;\ | ||
1032 | uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
1033 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1034 | uint32_t l1,h1;\ | ||
1035 | \ | ||
1036 | pixels+=line_size;\ | ||
1037 | for(i=0; i<h; i+=2){\ | ||
1038 | uint32_t a= AV_RN32(pixels );\ | ||
1039 | uint32_t b= AV_RN32(pixels+1);\ | ||
1040 | l1= (a&0x03030303UL)\ | ||
1041 | + (b&0x03030303UL);\ | ||
1042 | h1= ((a&0xFCFCFCFCUL)>>2)\ | ||
1043 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1044 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
1045 | pixels+=line_size;\ | ||
1046 | block +=line_size;\ | ||
1047 | a= AV_RN32(pixels );\ | ||
1048 | b= AV_RN32(pixels+1);\ | ||
1049 | l0= (a&0x03030303UL)\ | ||
1050 | + (b&0x03030303UL)\ | ||
1051 | + 0x01010101UL;\ | ||
1052 | h0= ((a&0xFCFCFCFCUL)>>2)\ | ||
1053 | + ((b&0xFCFCFCFCUL)>>2);\ | ||
1054 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | ||
1055 | pixels+=line_size;\ | ||
1056 | block +=line_size;\ | ||
1057 | }\ | ||
1058 | pixels+=4-line_size*(h+1);\ | ||
1059 | block +=4-line_size*h;\ | ||
1060 | }\ | ||
1061 | }\ | ||
1062 | \ | ||
1063 | CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ | ||
1064 | CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ | ||
1065 | CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ | ||
1066 | CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ | ||
1067 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ | ||
1068 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ | ||
1069 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ | ||
1070 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ | ||
1071 | |||
1072 | #define op_avg(a, b) a = rnd_avg32(a, b) | ||
1073 | #endif | ||
1074 | #define op_put(a, b) a = b | ||
1075 | |||
1076 | PIXOP2(avg, op_avg) | ||
1077 | PIXOP2(put, op_put) | ||
1078 | #undef op_avg | ||
1079 | #undef op_put | ||
1080 | |||
1081 | #define avg2(a,b) ((a+b+1)>>1) | ||
1082 | #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | ||
1083 | |||
1084 | static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ | ||
1085 | put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); | ||
1086 | } | ||
1087 | |||
1088 | static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ | ||
1089 | put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); | ||
1090 | } | ||
1091 | |||
1092 | static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) | ||
1093 | { | ||
1094 | const int A=(16-x16)*(16-y16); | ||
1095 | const int B=( x16)*(16-y16); | ||
1096 | const int C=(16-x16)*( y16); | ||
1097 | const int D=( x16)*( y16); | ||
1098 | int i; | ||
1099 | |||
1100 | for(i=0; i<h; i++) | ||
1101 | { | ||
1102 | dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; | ||
1103 | dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | ||
1104 | dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; | ||
1105 | dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; | ||
1106 | dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; | ||
1107 | dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; | ||
1108 | dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; | ||
1109 | dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; | ||
1110 | dst+= stride; | ||
1111 | src+= stride; | ||
1112 | } | ||
1113 | } | ||
1114 | |||
1115 | void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, | ||
1116 | int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) | ||
1117 | { | ||
1118 | int y, vx, vy; | ||
1119 | const int s= 1<<shift; | ||
1120 | |||
1121 | width--; | ||
1122 | height--; | ||
1123 | |||
1124 | for(y=0; y<h; y++){ | ||
1125 | int x; | ||
1126 | |||
1127 | vx= ox; | ||
1128 | vy= oy; | ||
1129 | for(x=0; x<8; x++){ //XXX FIXME optimize | ||
1130 | int src_x, src_y, frac_x, frac_y, index; | ||
1131 | |||
1132 | src_x= vx>>16; | ||
1133 | src_y= vy>>16; | ||
1134 | frac_x= src_x&(s-1); | ||
1135 | frac_y= src_y&(s-1); | ||
1136 | src_x>>=shift; | ||
1137 | src_y>>=shift; | ||
1138 | |||
1139 | if((unsigned)src_x < width){ | ||
1140 | if((unsigned)src_y < height){ | ||
1141 | index= src_x + src_y*stride; | ||
1142 | dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | ||
1143 | + src[index +1]* frac_x )*(s-frac_y) | ||
1144 | + ( src[index+stride ]*(s-frac_x) | ||
1145 | + src[index+stride+1]* frac_x )* frac_y | ||
1146 | + r)>>(shift*2); | ||
1147 | }else{ | ||
1148 | index= src_x + av_clip(src_y, 0, height)*stride; | ||
1149 | dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | ||
1150 | + src[index +1]* frac_x )*s | ||
1151 | + r)>>(shift*2); | ||
1152 | } | ||
1153 | }else{ | ||
1154 | if((unsigned)src_y < height){ | ||
1155 | index= av_clip(src_x, 0, width) + src_y*stride; | ||
1156 | dst[y*stride + x]= ( ( src[index ]*(s-frac_y) | ||
1157 | + src[index+stride ]* frac_y )*s | ||
1158 | + r)>>(shift*2); | ||
1159 | }else{ | ||
1160 | index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride; | ||
1161 | dst[y*stride + x]= src[index ]; | ||
1162 | } | ||
1163 | } | ||
1164 | |||
1165 | vx+= dxx; | ||
1166 | vy+= dyx; | ||
1167 | } | ||
1168 | ox += dxy; | ||
1169 | oy += dyy; | ||
1170 | } | ||
1171 | } | ||
1172 | |||
1173 | static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1174 | switch(width){ | ||
1175 | case 2: put_pixels2_c (dst, src, stride, height); break; | ||
1176 | case 4: put_pixels4_c (dst, src, stride, height); break; | ||
1177 | case 8: put_pixels8_c (dst, src, stride, height); break; | ||
1178 | case 16:put_pixels16_c(dst, src, stride, height); break; | ||
1179 | } | ||
1180 | } | ||
1181 | |||
1182 | static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1183 | int i,j; | ||
1184 | for (i=0; i < height; i++) { | ||
1185 | for (j=0; j < width; j++) { | ||
1186 | dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; | ||
1187 | } | ||
1188 | src += stride; | ||
1189 | dst += stride; | ||
1190 | } | ||
1191 | } | ||
1192 | |||
1193 | static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1194 | int i,j; | ||
1195 | for (i=0; i < height; i++) { | ||
1196 | for (j=0; j < width; j++) { | ||
1197 | dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; | ||
1198 | } | ||
1199 | src += stride; | ||
1200 | dst += stride; | ||
1201 | } | ||
1202 | } | ||
1203 | |||
1204 | static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1205 | int i,j; | ||
1206 | for (i=0; i < height; i++) { | ||
1207 | for (j=0; j < width; j++) { | ||
1208 | dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; | ||
1209 | } | ||
1210 | src += stride; | ||
1211 | dst += stride; | ||
1212 | } | ||
1213 | } | ||
1214 | |||
1215 | static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1216 | int i,j; | ||
1217 | for (i=0; i < height; i++) { | ||
1218 | for (j=0; j < width; j++) { | ||
1219 | dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; | ||
1220 | } | ||
1221 | src += stride; | ||
1222 | dst += stride; | ||
1223 | } | ||
1224 | } | ||
1225 | |||
1226 | static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1227 | int i,j; | ||
1228 | for (i=0; i < height; i++) { | ||
1229 | for (j=0; j < width; j++) { | ||
1230 | dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | ||
1231 | } | ||
1232 | src += stride; | ||
1233 | dst += stride; | ||
1234 | } | ||
1235 | } | ||
1236 | |||
1237 | static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1238 | int i,j; | ||
1239 | for (i=0; i < height; i++) { | ||
1240 | for (j=0; j < width; j++) { | ||
1241 | dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; | ||
1242 | } | ||
1243 | src += stride; | ||
1244 | dst += stride; | ||
1245 | } | ||
1246 | } | ||
1247 | |||
1248 | static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1249 | int i,j; | ||
1250 | for (i=0; i < height; i++) { | ||
1251 | for (j=0; j < width; j++) { | ||
1252 | dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | ||
1253 | } | ||
1254 | src += stride; | ||
1255 | dst += stride; | ||
1256 | } | ||
1257 | } | ||
1258 | |||
1259 | static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1260 | int i,j; | ||
1261 | for (i=0; i < height; i++) { | ||
1262 | for (j=0; j < width; j++) { | ||
1263 | dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; | ||
1264 | } | ||
1265 | src += stride; | ||
1266 | dst += stride; | ||
1267 | } | ||
1268 | } | ||
1269 | |||
1270 | static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1271 | switch(width){ | ||
1272 | case 2: avg_pixels2_c (dst, src, stride, height); break; | ||
1273 | case 4: avg_pixels4_c (dst, src, stride, height); break; | ||
1274 | case 8: avg_pixels8_c (dst, src, stride, height); break; | ||
1275 | case 16:avg_pixels16_c(dst, src, stride, height); break; | ||
1276 | } | ||
1277 | } | ||
1278 | |||
1279 | static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1280 | int i,j; | ||
1281 | for (i=0; i < height; i++) { | ||
1282 | for (j=0; j < width; j++) { | ||
1283 | dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; | ||
1284 | } | ||
1285 | src += stride; | ||
1286 | dst += stride; | ||
1287 | } | ||
1288 | } | ||
1289 | |||
1290 | static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1291 | int i,j; | ||
1292 | for (i=0; i < height; i++) { | ||
1293 | for (j=0; j < width; j++) { | ||
1294 | dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; | ||
1295 | } | ||
1296 | src += stride; | ||
1297 | dst += stride; | ||
1298 | } | ||
1299 | } | ||
1300 | |||
1301 | static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1302 | int i,j; | ||
1303 | for (i=0; i < height; i++) { | ||
1304 | for (j=0; j < width; j++) { | ||
1305 | dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; | ||
1306 | } | ||
1307 | src += stride; | ||
1308 | dst += stride; | ||
1309 | } | ||
1310 | } | ||
1311 | |||
1312 | static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1313 | int i,j; | ||
1314 | for (i=0; i < height; i++) { | ||
1315 | for (j=0; j < width; j++) { | ||
1316 | dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | ||
1317 | } | ||
1318 | src += stride; | ||
1319 | dst += stride; | ||
1320 | } | ||
1321 | } | ||
1322 | |||
1323 | static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1324 | int i,j; | ||
1325 | for (i=0; i < height; i++) { | ||
1326 | for (j=0; j < width; j++) { | ||
1327 | dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | ||
1328 | } | ||
1329 | src += stride; | ||
1330 | dst += stride; | ||
1331 | } | ||
1332 | } | ||
1333 | |||
1334 | static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1335 | int i,j; | ||
1336 | for (i=0; i < height; i++) { | ||
1337 | for (j=0; j < width; j++) { | ||
1338 | dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; | ||
1339 | } | ||
1340 | src += stride; | ||
1341 | dst += stride; | ||
1342 | } | ||
1343 | } | ||
1344 | |||
1345 | static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1346 | int i,j; | ||
1347 | for (i=0; i < height; i++) { | ||
1348 | for (j=0; j < width; j++) { | ||
1349 | dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | ||
1350 | } | ||
1351 | src += stride; | ||
1352 | dst += stride; | ||
1353 | } | ||
1354 | } | ||
1355 | |||
1356 | static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | ||
1357 | int i,j; | ||
1358 | for (i=0; i < height; i++) { | ||
1359 | for (j=0; j < width; j++) { | ||
1360 | dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | ||
1361 | } | ||
1362 | src += stride; | ||
1363 | dst += stride; | ||
1364 | } | ||
1365 | } | ||
1366 | #if 0 | ||
1367 | #define TPEL_WIDTH(width)\ | ||
1368 | static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1369 | void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ | ||
1370 | static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1371 | void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ | ||
1372 | static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1373 | void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ | ||
1374 | static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1375 | void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ | ||
1376 | static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1377 | void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ | ||
1378 | static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1379 | void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ | ||
1380 | static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1381 | void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ | ||
1382 | static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1383 | void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ | ||
1384 | static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | ||
1385 | void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} | ||
1386 | #endif | ||
1387 | |||
1388 | #define H264_CHROMA_MC(OPNAME, OP)\ | ||
1389 | static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | ||
1390 | const int A=(8-x)*(8-y);\ | ||
1391 | const int B=( x)*(8-y);\ | ||
1392 | const int C=(8-x)*( y);\ | ||
1393 | const int D=( x)*( y);\ | ||
1394 | int i;\ | ||
1395 | \ | ||
1396 | assert(x<8 && y<8 && x>=0 && y>=0);\ | ||
1397 | \ | ||
1398 | if(D){\ | ||
1399 | for(i=0; i<h; i++){\ | ||
1400 | OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | ||
1401 | OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | ||
1402 | dst+= stride;\ | ||
1403 | src+= stride;\ | ||
1404 | }\ | ||
1405 | }else{\ | ||
1406 | const int E= B+C;\ | ||
1407 | const int step= C ? stride : 1;\ | ||
1408 | for(i=0; i<h; i++){\ | ||
1409 | OP(dst[0], (A*src[0] + E*src[step+0]));\ | ||
1410 | OP(dst[1], (A*src[1] + E*src[step+1]));\ | ||
1411 | dst+= stride;\ | ||
1412 | src+= stride;\ | ||
1413 | }\ | ||
1414 | }\ | ||
1415 | }\ | ||
1416 | \ | ||
1417 | static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | ||
1418 | const int A=(8-x)*(8-y);\ | ||
1419 | const int B=( x)*(8-y);\ | ||
1420 | const int C=(8-x)*( y);\ | ||
1421 | const int D=( x)*( y);\ | ||
1422 | int i;\ | ||
1423 | \ | ||
1424 | assert(x<8 && y<8 && x>=0 && y>=0);\ | ||
1425 | \ | ||
1426 | if(D){\ | ||
1427 | for(i=0; i<h; i++){\ | ||
1428 | OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | ||
1429 | OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | ||
1430 | OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | ||
1431 | OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | ||
1432 | dst+= stride;\ | ||
1433 | src+= stride;\ | ||
1434 | }\ | ||
1435 | }else{\ | ||
1436 | const int E= B+C;\ | ||
1437 | const int step= C ? stride : 1;\ | ||
1438 | for(i=0; i<h; i++){\ | ||
1439 | OP(dst[0], (A*src[0] + E*src[step+0]));\ | ||
1440 | OP(dst[1], (A*src[1] + E*src[step+1]));\ | ||
1441 | OP(dst[2], (A*src[2] + E*src[step+2]));\ | ||
1442 | OP(dst[3], (A*src[3] + E*src[step+3]));\ | ||
1443 | dst+= stride;\ | ||
1444 | src+= stride;\ | ||
1445 | }\ | ||
1446 | }\ | ||
1447 | }\ | ||
1448 | \ | ||
1449 | static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | ||
1450 | const int A=(8-x)*(8-y);\ | ||
1451 | const int B=( x)*(8-y);\ | ||
1452 | const int C=(8-x)*( y);\ | ||
1453 | const int D=( x)*( y);\ | ||
1454 | int i;\ | ||
1455 | \ | ||
1456 | assert(x<8 && y<8 && x>=0 && y>=0);\ | ||
1457 | \ | ||
1458 | if(D){\ | ||
1459 | for(i=0; i<h; i++){\ | ||
1460 | OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | ||
1461 | OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | ||
1462 | OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | ||
1463 | OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | ||
1464 | OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ | ||
1465 | OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ | ||
1466 | OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ | ||
1467 | OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ | ||
1468 | dst+= stride;\ | ||
1469 | src+= stride;\ | ||
1470 | }\ | ||
1471 | }else{\ | ||
1472 | const int E= B+C;\ | ||
1473 | const int step= C ? stride : 1;\ | ||
1474 | for(i=0; i<h; i++){\ | ||
1475 | OP(dst[0], (A*src[0] + E*src[step+0]));\ | ||
1476 | OP(dst[1], (A*src[1] + E*src[step+1]));\ | ||
1477 | OP(dst[2], (A*src[2] + E*src[step+2]));\ | ||
1478 | OP(dst[3], (A*src[3] + E*src[step+3]));\ | ||
1479 | OP(dst[4], (A*src[4] + E*src[step+4]));\ | ||
1480 | OP(dst[5], (A*src[5] + E*src[step+5]));\ | ||
1481 | OP(dst[6], (A*src[6] + E*src[step+6]));\ | ||
1482 | OP(dst[7], (A*src[7] + E*src[step+7]));\ | ||
1483 | dst+= stride;\ | ||
1484 | src+= stride;\ | ||
1485 | }\ | ||
1486 | }\ | ||
1487 | } | ||
1488 | |||
1489 | #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) | ||
1490 | #define op_put(a, b) a = (((b) + 32)>>6) | ||
1491 | |||
1492 | H264_CHROMA_MC(put_ , op_put) | ||
1493 | H264_CHROMA_MC(avg_ , op_avg) | ||
1494 | #undef op_avg | ||
1495 | #undef op_put | ||
1496 | |||
1497 | static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ | ||
1498 | const int A=(8-x)*(8-y); | ||
1499 | const int B=( x)*(8-y); | ||
1500 | const int C=(8-x)*( y); | ||
1501 | const int D=( x)*( y); | ||
1502 | int i; | ||
1503 | |||
1504 | assert(x<8 && y<8 && x>=0 && y>=0); | ||
1505 | |||
1506 | for(i=0; i<h; i++) | ||
1507 | { | ||
1508 | dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6; | ||
1509 | dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6; | ||
1510 | dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6; | ||
1511 | dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6; | ||
1512 | dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6; | ||
1513 | dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6; | ||
1514 | dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6; | ||
1515 | dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6; | ||
1516 | dst+= stride; | ||
1517 | src+= stride; | ||
1518 | } | ||
1519 | } | ||
1520 | |||
1521 | #define QPEL_MC(r, OPNAME, RND, OP) \ | ||
1522 | static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ | ||
1523 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
1524 | int i;\ | ||
1525 | for(i=0; i<h; i++)\ | ||
1526 | {\ | ||
1527 | OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ | ||
1528 | OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ | ||
1529 | OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ | ||
1530 | OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ | ||
1531 | OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ | ||
1532 | OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ | ||
1533 | OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ | ||
1534 | OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ | ||
1535 | dst+=dstStride;\ | ||
1536 | src+=srcStride;\ | ||
1537 | }\ | ||
1538 | }\ | ||
1539 | \ | ||
1540 | static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
1541 | const int w=8;\ | ||
1542 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
1543 | int i;\ | ||
1544 | for(i=0; i<w; i++)\ | ||
1545 | {\ | ||
1546 | const int src0= src[0*srcStride];\ | ||
1547 | const int src1= src[1*srcStride];\ | ||
1548 | const int src2= src[2*srcStride];\ | ||
1549 | const int src3= src[3*srcStride];\ | ||
1550 | const int src4= src[4*srcStride];\ | ||
1551 | const int src5= src[5*srcStride];\ | ||
1552 | const int src6= src[6*srcStride];\ | ||
1553 | const int src7= src[7*srcStride];\ | ||
1554 | const int src8= src[8*srcStride];\ | ||
1555 | OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ | ||
1556 | OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ | ||
1557 | OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ | ||
1558 | OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ | ||
1559 | OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ | ||
1560 | OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ | ||
1561 | OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ | ||
1562 | OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ | ||
1563 | dst++;\ | ||
1564 | src++;\ | ||
1565 | }\ | ||
1566 | }\ | ||
1567 | \ | ||
1568 | static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ | ||
1569 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
1570 | int i;\ | ||
1571 | \ | ||
1572 | for(i=0; i<h; i++)\ | ||
1573 | {\ | ||
1574 | OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ | ||
1575 | OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ | ||
1576 | OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ | ||
1577 | OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ | ||
1578 | OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ | ||
1579 | OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ | ||
1580 | OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ | ||
1581 | OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ | ||
1582 | OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ | ||
1583 | OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ | ||
1584 | OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ | ||
1585 | OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ | ||
1586 | OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ | ||
1587 | OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ | ||
1588 | OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ | ||
1589 | OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ | ||
1590 | dst+=dstStride;\ | ||
1591 | src+=srcStride;\ | ||
1592 | }\ | ||
1593 | }\ | ||
1594 | \ | ||
1595 | static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
1596 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
1597 | int i;\ | ||
1598 | const int w=16;\ | ||
1599 | for(i=0; i<w; i++)\ | ||
1600 | {\ | ||
1601 | const int src0= src[0*srcStride];\ | ||
1602 | const int src1= src[1*srcStride];\ | ||
1603 | const int src2= src[2*srcStride];\ | ||
1604 | const int src3= src[3*srcStride];\ | ||
1605 | const int src4= src[4*srcStride];\ | ||
1606 | const int src5= src[5*srcStride];\ | ||
1607 | const int src6= src[6*srcStride];\ | ||
1608 | const int src7= src[7*srcStride];\ | ||
1609 | const int src8= src[8*srcStride];\ | ||
1610 | const int src9= src[9*srcStride];\ | ||
1611 | const int src10= src[10*srcStride];\ | ||
1612 | const int src11= src[11*srcStride];\ | ||
1613 | const int src12= src[12*srcStride];\ | ||
1614 | const int src13= src[13*srcStride];\ | ||
1615 | const int src14= src[14*srcStride];\ | ||
1616 | const int src15= src[15*srcStride];\ | ||
1617 | const int src16= src[16*srcStride];\ | ||
1618 | OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ | ||
1619 | OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ | ||
1620 | OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ | ||
1621 | OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ | ||
1622 | OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ | ||
1623 | OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ | ||
1624 | OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ | ||
1625 | OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ | ||
1626 | OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ | ||
1627 | OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ | ||
1628 | OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ | ||
1629 | OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ | ||
1630 | OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ | ||
1631 | OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ | ||
1632 | OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ | ||
1633 | OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ | ||
1634 | dst++;\ | ||
1635 | src++;\ | ||
1636 | }\ | ||
1637 | }\ | ||
1638 | \ | ||
1639 | static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | ||
1640 | OPNAME ## pixels8_c(dst, src, stride, 8);\ | ||
1641 | }\ | ||
1642 | \ | ||
1643 | static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1644 | uint8_t half[64];\ | ||
1645 | put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ | ||
1646 | OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ | ||
1647 | }\ | ||
1648 | \ | ||
1649 | static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1650 | OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ | ||
1651 | }\ | ||
1652 | \ | ||
1653 | static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1654 | uint8_t half[64];\ | ||
1655 | put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ | ||
1656 | OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ | ||
1657 | }\ | ||
1658 | \ | ||
1659 | static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1660 | uint8_t full[16*9];\ | ||
1661 | uint8_t half[64];\ | ||
1662 | copy_block9(full, src, 16, stride, 9);\ | ||
1663 | put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ | ||
1664 | OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ | ||
1665 | }\ | ||
1666 | \ | ||
1667 | static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1668 | uint8_t full[16*9];\ | ||
1669 | copy_block9(full, src, 16, stride, 9);\ | ||
1670 | OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ | ||
1671 | }\ | ||
1672 | \ | ||
1673 | static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1674 | uint8_t full[16*9];\ | ||
1675 | uint8_t half[64];\ | ||
1676 | copy_block9(full, src, 16, stride, 9);\ | ||
1677 | put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ | ||
1678 | OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ | ||
1679 | }\ | ||
1680 | void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1681 | uint8_t full[16*9];\ | ||
1682 | uint8_t halfH[72];\ | ||
1683 | uint8_t halfV[64];\ | ||
1684 | uint8_t halfHV[64];\ | ||
1685 | copy_block9(full, src, 16, stride, 9);\ | ||
1686 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1687 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ | ||
1688 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1689 | OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ | ||
1690 | }\ | ||
1691 | static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1692 | uint8_t full[16*9];\ | ||
1693 | uint8_t halfH[72];\ | ||
1694 | uint8_t halfHV[64];\ | ||
1695 | copy_block9(full, src, 16, stride, 9);\ | ||
1696 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1697 | put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | ||
1698 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1699 | OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | ||
1700 | }\ | ||
1701 | void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1702 | uint8_t full[16*9];\ | ||
1703 | uint8_t halfH[72];\ | ||
1704 | uint8_t halfV[64];\ | ||
1705 | uint8_t halfHV[64];\ | ||
1706 | copy_block9(full, src, 16, stride, 9);\ | ||
1707 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1708 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ | ||
1709 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1710 | OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ | ||
1711 | }\ | ||
1712 | static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1713 | uint8_t full[16*9];\ | ||
1714 | uint8_t halfH[72];\ | ||
1715 | uint8_t halfHV[64];\ | ||
1716 | copy_block9(full, src, 16, stride, 9);\ | ||
1717 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1718 | put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | ||
1719 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1720 | OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | ||
1721 | }\ | ||
1722 | void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1723 | uint8_t full[16*9];\ | ||
1724 | uint8_t halfH[72];\ | ||
1725 | uint8_t halfV[64];\ | ||
1726 | uint8_t halfHV[64];\ | ||
1727 | copy_block9(full, src, 16, stride, 9);\ | ||
1728 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1729 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ | ||
1730 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1731 | OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ | ||
1732 | }\ | ||
1733 | static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1734 | uint8_t full[16*9];\ | ||
1735 | uint8_t halfH[72];\ | ||
1736 | uint8_t halfHV[64];\ | ||
1737 | copy_block9(full, src, 16, stride, 9);\ | ||
1738 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1739 | put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | ||
1740 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1741 | OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | ||
1742 | }\ | ||
1743 | void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1744 | uint8_t full[16*9];\ | ||
1745 | uint8_t halfH[72];\ | ||
1746 | uint8_t halfV[64];\ | ||
1747 | uint8_t halfHV[64];\ | ||
1748 | copy_block9(full, src, 16, stride, 9);\ | ||
1749 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ | ||
1750 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ | ||
1751 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1752 | OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ | ||
1753 | }\ | ||
1754 | static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1755 | uint8_t full[16*9];\ | ||
1756 | uint8_t halfH[72];\ | ||
1757 | uint8_t halfHV[64];\ | ||
1758 | copy_block9(full, src, 16, stride, 9);\ | ||
1759 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1760 | put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | ||
1761 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1762 | OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | ||
1763 | }\ | ||
1764 | static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1765 | uint8_t halfH[72];\ | ||
1766 | uint8_t halfHV[64];\ | ||
1767 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ | ||
1768 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1769 | OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | ||
1770 | }\ | ||
1771 | static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1772 | uint8_t halfH[72];\ | ||
1773 | uint8_t halfHV[64];\ | ||
1774 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ | ||
1775 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1776 | OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | ||
1777 | }\ | ||
1778 | void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1779 | uint8_t full[16*9];\ | ||
1780 | uint8_t halfH[72];\ | ||
1781 | uint8_t halfV[64];\ | ||
1782 | uint8_t halfHV[64];\ | ||
1783 | copy_block9(full, src, 16, stride, 9);\ | ||
1784 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1785 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ | ||
1786 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1787 | OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ | ||
1788 | }\ | ||
1789 | static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1790 | uint8_t full[16*9];\ | ||
1791 | uint8_t halfH[72];\ | ||
1792 | copy_block9(full, src, 16, stride, 9);\ | ||
1793 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1794 | put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | ||
1795 | OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | ||
1796 | }\ | ||
1797 | void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1798 | uint8_t full[16*9];\ | ||
1799 | uint8_t halfH[72];\ | ||
1800 | uint8_t halfV[64];\ | ||
1801 | uint8_t halfHV[64];\ | ||
1802 | copy_block9(full, src, 16, stride, 9);\ | ||
1803 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1804 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ | ||
1805 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | ||
1806 | OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ | ||
1807 | }\ | ||
1808 | static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1809 | uint8_t full[16*9];\ | ||
1810 | uint8_t halfH[72];\ | ||
1811 | copy_block9(full, src, 16, stride, 9);\ | ||
1812 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | ||
1813 | put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | ||
1814 | OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | ||
1815 | }\ | ||
1816 | static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1817 | uint8_t halfH[72];\ | ||
1818 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ | ||
1819 | OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | ||
1820 | }\ | ||
1821 | static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | ||
1822 | OPNAME ## pixels16_c(dst, src, stride, 16);\ | ||
1823 | }\ | ||
1824 | \ | ||
1825 | static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1826 | uint8_t half[256];\ | ||
1827 | put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ | ||
1828 | OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ | ||
1829 | }\ | ||
1830 | \ | ||
1831 | static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1832 | OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ | ||
1833 | }\ | ||
1834 | \ | ||
1835 | static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1836 | uint8_t half[256];\ | ||
1837 | put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ | ||
1838 | OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ | ||
1839 | }\ | ||
1840 | \ | ||
1841 | static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1842 | uint8_t full[24*17];\ | ||
1843 | uint8_t half[256];\ | ||
1844 | copy_block17(full, src, 24, stride, 17);\ | ||
1845 | put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ | ||
1846 | OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ | ||
1847 | }\ | ||
1848 | \ | ||
1849 | static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1850 | uint8_t full[24*17];\ | ||
1851 | copy_block17(full, src, 24, stride, 17);\ | ||
1852 | OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ | ||
1853 | }\ | ||
1854 | \ | ||
1855 | static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1856 | uint8_t full[24*17];\ | ||
1857 | uint8_t half[256];\ | ||
1858 | copy_block17(full, src, 24, stride, 17);\ | ||
1859 | put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ | ||
1860 | OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ | ||
1861 | }\ | ||
1862 | void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1863 | uint8_t full[24*17];\ | ||
1864 | uint8_t halfH[272];\ | ||
1865 | uint8_t halfV[256];\ | ||
1866 | uint8_t halfHV[256];\ | ||
1867 | copy_block17(full, src, 24, stride, 17);\ | ||
1868 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
1869 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ | ||
1870 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1871 | OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ | ||
1872 | }\ | ||
1873 | static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1874 | uint8_t full[24*17];\ | ||
1875 | uint8_t halfH[272];\ | ||
1876 | uint8_t halfHV[256];\ | ||
1877 | copy_block17(full, src, 24, stride, 17);\ | ||
1878 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
1879 | put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | ||
1880 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1881 | OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | ||
1882 | }\ | ||
1883 | void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1884 | uint8_t full[24*17];\ | ||
1885 | uint8_t halfH[272];\ | ||
1886 | uint8_t halfV[256];\ | ||
1887 | uint8_t halfHV[256];\ | ||
1888 | copy_block17(full, src, 24, stride, 17);\ | ||
1889 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
1890 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ | ||
1891 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1892 | OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ | ||
1893 | }\ | ||
1894 | static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1895 | uint8_t full[24*17];\ | ||
1896 | uint8_t halfH[272];\ | ||
1897 | uint8_t halfHV[256];\ | ||
1898 | copy_block17(full, src, 24, stride, 17);\ | ||
1899 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
1900 | put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | ||
1901 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1902 | OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | ||
1903 | }\ | ||
1904 | void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1905 | uint8_t full[24*17];\ | ||
1906 | uint8_t halfH[272];\ | ||
1907 | uint8_t halfV[256];\ | ||
1908 | uint8_t halfHV[256];\ | ||
1909 | copy_block17(full, src, 24, stride, 17);\ | ||
1910 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
1911 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ | ||
1912 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1913 | OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ | ||
1914 | }\ | ||
1915 | static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1916 | uint8_t full[24*17];\ | ||
1917 | uint8_t halfH[272];\ | ||
1918 | uint8_t halfHV[256];\ | ||
1919 | copy_block17(full, src, 24, stride, 17);\ | ||
1920 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
1921 | put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | ||
1922 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1923 | OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | ||
1924 | }\ | ||
1925 | void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1926 | uint8_t full[24*17];\ | ||
1927 | uint8_t halfH[272];\ | ||
1928 | uint8_t halfV[256];\ | ||
1929 | uint8_t halfHV[256];\ | ||
1930 | copy_block17(full, src, 24, stride, 17);\ | ||
1931 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ | ||
1932 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ | ||
1933 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1934 | OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ | ||
1935 | }\ | ||
1936 | static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1937 | uint8_t full[24*17];\ | ||
1938 | uint8_t halfH[272];\ | ||
1939 | uint8_t halfHV[256];\ | ||
1940 | copy_block17(full, src, 24, stride, 17);\ | ||
1941 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
1942 | put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | ||
1943 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1944 | OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | ||
1945 | }\ | ||
1946 | static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1947 | uint8_t halfH[272];\ | ||
1948 | uint8_t halfHV[256];\ | ||
1949 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ | ||
1950 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1951 | OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | ||
1952 | }\ | ||
1953 | static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1954 | uint8_t halfH[272];\ | ||
1955 | uint8_t halfHV[256];\ | ||
1956 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ | ||
1957 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1958 | OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | ||
1959 | }\ | ||
1960 | void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1961 | uint8_t full[24*17];\ | ||
1962 | uint8_t halfH[272];\ | ||
1963 | uint8_t halfV[256];\ | ||
1964 | uint8_t halfHV[256];\ | ||
1965 | copy_block17(full, src, 24, stride, 17);\ | ||
1966 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
1967 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ | ||
1968 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1969 | OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ | ||
1970 | }\ | ||
1971 | static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1972 | uint8_t full[24*17];\ | ||
1973 | uint8_t halfH[272];\ | ||
1974 | copy_block17(full, src, 24, stride, 17);\ | ||
1975 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
1976 | put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | ||
1977 | OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | ||
1978 | }\ | ||
1979 | void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1980 | uint8_t full[24*17];\ | ||
1981 | uint8_t halfH[272];\ | ||
1982 | uint8_t halfV[256];\ | ||
1983 | uint8_t halfHV[256];\ | ||
1984 | copy_block17(full, src, 24, stride, 17);\ | ||
1985 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
1986 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ | ||
1987 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | ||
1988 | OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ | ||
1989 | }\ | ||
1990 | static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1991 | uint8_t full[24*17];\ | ||
1992 | uint8_t halfH[272];\ | ||
1993 | copy_block17(full, src, 24, stride, 17);\ | ||
1994 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | ||
1995 | put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | ||
1996 | OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | ||
1997 | }\ | ||
1998 | static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
1999 | uint8_t halfH[272];\ | ||
2000 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ | ||
2001 | OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | ||
2002 | } | ||
2003 | |||
2004 | #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | ||
2005 | #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) | ||
2006 | #define op_put(a, b) a = cm[((b) + 16)>>5] | ||
2007 | #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] | ||
2008 | |||
2009 | QPEL_MC(0, put_ , _ , op_put) | ||
2010 | QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) | ||
2011 | QPEL_MC(0, avg_ , _ , op_avg) | ||
2012 | //QPEL_MC(1, avg_no_rnd , _ , op_avg) | ||
2013 | #undef op_avg | ||
2014 | #undef op_avg_no_rnd | ||
2015 | #undef op_put | ||
2016 | #undef op_put_no_rnd | ||
2017 | |||
2018 | #if 1 | ||
2019 | #define H264_LOWPASS(OPNAME, OP, OP2) \ | ||
2020 | static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2021 | const int h=2;\ | ||
2022 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2023 | int i;\ | ||
2024 | for(i=0; i<h; i++)\ | ||
2025 | {\ | ||
2026 | OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | ||
2027 | OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | ||
2028 | dst+=dstStride;\ | ||
2029 | src+=srcStride;\ | ||
2030 | }\ | ||
2031 | }\ | ||
2032 | \ | ||
2033 | static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2034 | const int w=2;\ | ||
2035 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2036 | int i;\ | ||
2037 | for(i=0; i<w; i++)\ | ||
2038 | {\ | ||
2039 | const int srcB= src[-2*srcStride];\ | ||
2040 | const int srcA= src[-1*srcStride];\ | ||
2041 | const int src0= src[0 *srcStride];\ | ||
2042 | const int src1= src[1 *srcStride];\ | ||
2043 | const int src2= src[2 *srcStride];\ | ||
2044 | const int src3= src[3 *srcStride];\ | ||
2045 | const int src4= src[4 *srcStride];\ | ||
2046 | OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | ||
2047 | OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | ||
2048 | dst++;\ | ||
2049 | src++;\ | ||
2050 | }\ | ||
2051 | }\ | ||
2052 | \ | ||
2053 | static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | ||
2054 | const int h=2;\ | ||
2055 | const int w=2;\ | ||
2056 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2057 | int i;\ | ||
2058 | src -= 2*srcStride;\ | ||
2059 | for(i=0; i<h+5; i++)\ | ||
2060 | {\ | ||
2061 | tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | ||
2062 | tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | ||
2063 | tmp+=tmpStride;\ | ||
2064 | src+=srcStride;\ | ||
2065 | }\ | ||
2066 | tmp -= tmpStride*(h+5-2);\ | ||
2067 | for(i=0; i<w; i++)\ | ||
2068 | {\ | ||
2069 | const int tmpB= tmp[-2*tmpStride];\ | ||
2070 | const int tmpA= tmp[-1*tmpStride];\ | ||
2071 | const int tmp0= tmp[0 *tmpStride];\ | ||
2072 | const int tmp1= tmp[1 *tmpStride];\ | ||
2073 | const int tmp2= tmp[2 *tmpStride];\ | ||
2074 | const int tmp3= tmp[3 *tmpStride];\ | ||
2075 | const int tmp4= tmp[4 *tmpStride];\ | ||
2076 | OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | ||
2077 | OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | ||
2078 | dst++;\ | ||
2079 | tmp++;\ | ||
2080 | }\ | ||
2081 | }\ | ||
2082 | static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2083 | const int h=4;\ | ||
2084 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2085 | int i;\ | ||
2086 | for(i=0; i<h; i++)\ | ||
2087 | {\ | ||
2088 | OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | ||
2089 | OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | ||
2090 | OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ | ||
2091 | OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ | ||
2092 | dst+=dstStride;\ | ||
2093 | src+=srcStride;\ | ||
2094 | }\ | ||
2095 | }\ | ||
2096 | \ | ||
2097 | static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2098 | const int w=4;\ | ||
2099 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2100 | int i;\ | ||
2101 | for(i=0; i<w; i++)\ | ||
2102 | {\ | ||
2103 | const int srcB= src[-2*srcStride];\ | ||
2104 | const int srcA= src[-1*srcStride];\ | ||
2105 | const int src0= src[0 *srcStride];\ | ||
2106 | const int src1= src[1 *srcStride];\ | ||
2107 | const int src2= src[2 *srcStride];\ | ||
2108 | const int src3= src[3 *srcStride];\ | ||
2109 | const int src4= src[4 *srcStride];\ | ||
2110 | const int src5= src[5 *srcStride];\ | ||
2111 | const int src6= src[6 *srcStride];\ | ||
2112 | OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | ||
2113 | OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | ||
2114 | OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | ||
2115 | OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | ||
2116 | dst++;\ | ||
2117 | src++;\ | ||
2118 | }\ | ||
2119 | }\ | ||
2120 | \ | ||
2121 | static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | ||
2122 | const int h=4;\ | ||
2123 | const int w=4;\ | ||
2124 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2125 | int i;\ | ||
2126 | src -= 2*srcStride;\ | ||
2127 | for(i=0; i<h+5; i++)\ | ||
2128 | {\ | ||
2129 | tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | ||
2130 | tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | ||
2131 | tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ | ||
2132 | tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ | ||
2133 | tmp+=tmpStride;\ | ||
2134 | src+=srcStride;\ | ||
2135 | }\ | ||
2136 | tmp -= tmpStride*(h+5-2);\ | ||
2137 | for(i=0; i<w; i++)\ | ||
2138 | {\ | ||
2139 | const int tmpB= tmp[-2*tmpStride];\ | ||
2140 | const int tmpA= tmp[-1*tmpStride];\ | ||
2141 | const int tmp0= tmp[0 *tmpStride];\ | ||
2142 | const int tmp1= tmp[1 *tmpStride];\ | ||
2143 | const int tmp2= tmp[2 *tmpStride];\ | ||
2144 | const int tmp3= tmp[3 *tmpStride];\ | ||
2145 | const int tmp4= tmp[4 *tmpStride];\ | ||
2146 | const int tmp5= tmp[5 *tmpStride];\ | ||
2147 | const int tmp6= tmp[6 *tmpStride];\ | ||
2148 | OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | ||
2149 | OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | ||
2150 | OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | ||
2151 | OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | ||
2152 | dst++;\ | ||
2153 | tmp++;\ | ||
2154 | }\ | ||
2155 | }\ | ||
2156 | \ | ||
2157 | static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2158 | const int h=8;\ | ||
2159 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2160 | int i;\ | ||
2161 | for(i=0; i<h; i++)\ | ||
2162 | {\ | ||
2163 | OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ | ||
2164 | OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ | ||
2165 | OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ | ||
2166 | OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ | ||
2167 | OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ | ||
2168 | OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ | ||
2169 | OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ | ||
2170 | OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ | ||
2171 | dst+=dstStride;\ | ||
2172 | src+=srcStride;\ | ||
2173 | }\ | ||
2174 | }\ | ||
2175 | \ | ||
2176 | static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2177 | const int w=8;\ | ||
2178 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2179 | int i;\ | ||
2180 | for(i=0; i<w; i++)\ | ||
2181 | {\ | ||
2182 | const int srcB= src[-2*srcStride];\ | ||
2183 | const int srcA= src[-1*srcStride];\ | ||
2184 | const int src0= src[0 *srcStride];\ | ||
2185 | const int src1= src[1 *srcStride];\ | ||
2186 | const int src2= src[2 *srcStride];\ | ||
2187 | const int src3= src[3 *srcStride];\ | ||
2188 | const int src4= src[4 *srcStride];\ | ||
2189 | const int src5= src[5 *srcStride];\ | ||
2190 | const int src6= src[6 *srcStride];\ | ||
2191 | const int src7= src[7 *srcStride];\ | ||
2192 | const int src8= src[8 *srcStride];\ | ||
2193 | const int src9= src[9 *srcStride];\ | ||
2194 | const int src10=src[10*srcStride];\ | ||
2195 | OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | ||
2196 | OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | ||
2197 | OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | ||
2198 | OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | ||
2199 | OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | ||
2200 | OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | ||
2201 | OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | ||
2202 | OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | ||
2203 | dst++;\ | ||
2204 | src++;\ | ||
2205 | }\ | ||
2206 | }\ | ||
2207 | \ | ||
2208 | static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | ||
2209 | const int h=8;\ | ||
2210 | const int w=8;\ | ||
2211 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ | ||
2212 | int i;\ | ||
2213 | src -= 2*srcStride;\ | ||
2214 | for(i=0; i<h+5; i++)\ | ||
2215 | {\ | ||
2216 | tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ | ||
2217 | tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ | ||
2218 | tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ | ||
2219 | tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ | ||
2220 | tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ | ||
2221 | tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ | ||
2222 | tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ | ||
2223 | tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ | ||
2224 | tmp+=tmpStride;\ | ||
2225 | src+=srcStride;\ | ||
2226 | }\ | ||
2227 | tmp -= tmpStride*(h+5-2);\ | ||
2228 | for(i=0; i<w; i++)\ | ||
2229 | {\ | ||
2230 | const int tmpB= tmp[-2*tmpStride];\ | ||
2231 | const int tmpA= tmp[-1*tmpStride];\ | ||
2232 | const int tmp0= tmp[0 *tmpStride];\ | ||
2233 | const int tmp1= tmp[1 *tmpStride];\ | ||
2234 | const int tmp2= tmp[2 *tmpStride];\ | ||
2235 | const int tmp3= tmp[3 *tmpStride];\ | ||
2236 | const int tmp4= tmp[4 *tmpStride];\ | ||
2237 | const int tmp5= tmp[5 *tmpStride];\ | ||
2238 | const int tmp6= tmp[6 *tmpStride];\ | ||
2239 | const int tmp7= tmp[7 *tmpStride];\ | ||
2240 | const int tmp8= tmp[8 *tmpStride];\ | ||
2241 | const int tmp9= tmp[9 *tmpStride];\ | ||
2242 | const int tmp10=tmp[10*tmpStride];\ | ||
2243 | OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | ||
2244 | OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | ||
2245 | OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | ||
2246 | OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | ||
2247 | OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ | ||
2248 | OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ | ||
2249 | OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ | ||
2250 | OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ | ||
2251 | dst++;\ | ||
2252 | tmp++;\ | ||
2253 | }\ | ||
2254 | }\ | ||
2255 | \ | ||
2256 | static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2257 | OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | ||
2258 | OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | ||
2259 | src += 8*srcStride;\ | ||
2260 | dst += 8*dstStride;\ | ||
2261 | OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | ||
2262 | OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | ||
2263 | }\ | ||
2264 | \ | ||
2265 | static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||
2266 | OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | ||
2267 | OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | ||
2268 | src += 8*srcStride;\ | ||
2269 | dst += 8*dstStride;\ | ||
2270 | OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | ||
2271 | OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | ||
2272 | }\ | ||
2273 | \ | ||
2274 | static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | ||
2275 | OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | ||
2276 | OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | ||
2277 | src += 8*srcStride;\ | ||
2278 | dst += 8*dstStride;\ | ||
2279 | OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | ||
2280 | OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | ||
2281 | }\ | ||
2282 | |||
2283 | #define H264_MC(OPNAME, SIZE) \ | ||
2284 | static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | ||
2285 | OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ | ||
2286 | }\ | ||
2287 | \ | ||
2288 | static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2289 | uint8_t half[SIZE*SIZE];\ | ||
2290 | put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | ||
2291 | OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ | ||
2292 | }\ | ||
2293 | \ | ||
2294 | static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2295 | OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ | ||
2296 | }\ | ||
2297 | \ | ||
2298 | static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2299 | uint8_t half[SIZE*SIZE];\ | ||
2300 | put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | ||
2301 | OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ | ||
2302 | }\ | ||
2303 | \ | ||
2304 | static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2305 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2306 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2307 | uint8_t half[SIZE*SIZE];\ | ||
2308 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | ||
2309 | put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | ||
2310 | OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ | ||
2311 | }\ | ||
2312 | \ | ||
2313 | static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2314 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2315 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2316 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | ||
2317 | OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ | ||
2318 | }\ | ||
2319 | \ | ||
2320 | static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2321 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2322 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2323 | uint8_t half[SIZE*SIZE];\ | ||
2324 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | ||
2325 | put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | ||
2326 | OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ | ||
2327 | }\ | ||
2328 | \ | ||
2329 | static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2330 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2331 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2332 | uint8_t halfH[SIZE*SIZE];\ | ||
2333 | uint8_t halfV[SIZE*SIZE];\ | ||
2334 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | ||
2335 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | ||
2336 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | ||
2337 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | ||
2338 | }\ | ||
2339 | \ | ||
2340 | static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2341 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2342 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2343 | uint8_t halfH[SIZE*SIZE];\ | ||
2344 | uint8_t halfV[SIZE*SIZE];\ | ||
2345 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | ||
2346 | copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | ||
2347 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | ||
2348 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | ||
2349 | }\ | ||
2350 | \ | ||
2351 | static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2352 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2353 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2354 | uint8_t halfH[SIZE*SIZE];\ | ||
2355 | uint8_t halfV[SIZE*SIZE];\ | ||
2356 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | ||
2357 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | ||
2358 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | ||
2359 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | ||
2360 | }\ | ||
2361 | \ | ||
2362 | static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2363 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2364 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2365 | uint8_t halfH[SIZE*SIZE];\ | ||
2366 | uint8_t halfV[SIZE*SIZE];\ | ||
2367 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | ||
2368 | copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | ||
2369 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | ||
2370 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | ||
2371 | }\ | ||
2372 | \ | ||
2373 | static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2374 | int16_t tmp[SIZE*(SIZE+5)];\ | ||
2375 | OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ | ||
2376 | }\ | ||
2377 | \ | ||
2378 | static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2379 | int16_t tmp[SIZE*(SIZE+5)];\ | ||
2380 | uint8_t halfH[SIZE*SIZE];\ | ||
2381 | uint8_t halfHV[SIZE*SIZE];\ | ||
2382 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | ||
2383 | put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | ||
2384 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | ||
2385 | }\ | ||
2386 | \ | ||
2387 | static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2388 | int16_t tmp[SIZE*(SIZE+5)];\ | ||
2389 | uint8_t halfH[SIZE*SIZE];\ | ||
2390 | uint8_t halfHV[SIZE*SIZE];\ | ||
2391 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | ||
2392 | put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | ||
2393 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | ||
2394 | }\ | ||
2395 | \ | ||
2396 | static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2397 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2398 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2399 | int16_t tmp[SIZE*(SIZE+5)];\ | ||
2400 | uint8_t halfV[SIZE*SIZE];\ | ||
2401 | uint8_t halfHV[SIZE*SIZE];\ | ||
2402 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | ||
2403 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | ||
2404 | put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | ||
2405 | OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | ||
2406 | }\ | ||
2407 | \ | ||
2408 | static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | ||
2409 | uint8_t full[SIZE*(SIZE+5)];\ | ||
2410 | uint8_t * const full_mid= full + SIZE*2;\ | ||
2411 | int16_t tmp[SIZE*(SIZE+5)];\ | ||
2412 | uint8_t halfV[SIZE*SIZE];\ | ||
2413 | uint8_t halfHV[SIZE*SIZE];\ | ||
2414 | copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | ||
2415 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | ||
2416 | put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | ||
2417 | OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | ||
2418 | }\ | ||
2419 | |||
2420 | #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | ||
2421 | //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) | ||
2422 | #define op_put(a, b) a = cm[((b) + 16)>>5] | ||
2423 | #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) | ||
2424 | #define op2_put(a, b) a = cm[((b) + 512)>>10] | ||
2425 | |||
2426 | H264_LOWPASS(put_ , op_put, op2_put) | ||
2427 | H264_LOWPASS(avg_ , op_avg, op2_avg) | ||
2428 | H264_MC(put_, 2) | ||
2429 | H264_MC(put_, 4) | ||
2430 | H264_MC(put_, 8) | ||
2431 | H264_MC(put_, 16) | ||
2432 | H264_MC(avg_, 4) | ||
2433 | H264_MC(avg_, 8) | ||
2434 | H264_MC(avg_, 16) | ||
2435 | |||
2436 | #undef op_avg | ||
2437 | #undef op_put | ||
2438 | #undef op2_avg | ||
2439 | #undef op2_put | ||
2440 | #endif | ||
2441 | |||
2442 | #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom ) | ||
2443 | #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) | ||
2444 | #define H264_WEIGHT(W,H) \ | ||
2445 | static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ | ||
2446 | int y; \ | ||
2447 | offset <<= log2_denom; \ | ||
2448 | if(log2_denom) offset += 1<<(log2_denom-1); \ | ||
2449 | for(y=0; y<H; y++, block += stride){ \ | ||
2450 | op_scale1(0); \ | ||
2451 | op_scale1(1); \ | ||
2452 | if(W==2) continue; \ | ||
2453 | op_scale1(2); \ | ||
2454 | op_scale1(3); \ | ||
2455 | if(W==4) continue; \ | ||
2456 | op_scale1(4); \ | ||
2457 | op_scale1(5); \ | ||
2458 | op_scale1(6); \ | ||
2459 | op_scale1(7); \ | ||
2460 | if(W==8) continue; \ | ||
2461 | op_scale1(8); \ | ||
2462 | op_scale1(9); \ | ||
2463 | op_scale1(10); \ | ||
2464 | op_scale1(11); \ | ||
2465 | op_scale1(12); \ | ||
2466 | op_scale1(13); \ | ||
2467 | op_scale1(14); \ | ||
2468 | op_scale1(15); \ | ||
2469 | } \ | ||
2470 | } \ | ||
2471 | static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \ | ||
2472 | int y; \ | ||
2473 | offset = ((offset + 1) | 1) << log2_denom; \ | ||
2474 | for(y=0; y<H; y++, dst += stride, src += stride){ \ | ||
2475 | op_scale2(0); \ | ||
2476 | op_scale2(1); \ | ||
2477 | if(W==2) continue; \ | ||
2478 | op_scale2(2); \ | ||
2479 | op_scale2(3); \ | ||
2480 | if(W==4) continue; \ | ||
2481 | op_scale2(4); \ | ||
2482 | op_scale2(5); \ | ||
2483 | op_scale2(6); \ | ||
2484 | op_scale2(7); \ | ||
2485 | if(W==8) continue; \ | ||
2486 | op_scale2(8); \ | ||
2487 | op_scale2(9); \ | ||
2488 | op_scale2(10); \ | ||
2489 | op_scale2(11); \ | ||
2490 | op_scale2(12); \ | ||
2491 | op_scale2(13); \ | ||
2492 | op_scale2(14); \ | ||
2493 | op_scale2(15); \ | ||
2494 | } \ | ||
2495 | } | ||
2496 | |||
2497 | H264_WEIGHT(16,16) | ||
2498 | H264_WEIGHT(16,8) | ||
2499 | H264_WEIGHT(8,16) | ||
2500 | H264_WEIGHT(8,8) | ||
2501 | H264_WEIGHT(8,4) | ||
2502 | H264_WEIGHT(4,8) | ||
2503 | H264_WEIGHT(4,4) | ||
2504 | H264_WEIGHT(4,2) | ||
2505 | H264_WEIGHT(2,4) | ||
2506 | H264_WEIGHT(2,2) | ||
2507 | |||
2508 | #undef op_scale1 | ||
2509 | #undef op_scale2 | ||
2510 | #undef H264_WEIGHT | ||
2511 | |||
2512 | static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ | ||
2513 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
2514 | int i; | ||
2515 | |||
2516 | for(i=0; i<h; i++){ | ||
2517 | dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; | ||
2518 | dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; | ||
2519 | dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; | ||
2520 | dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; | ||
2521 | dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; | ||
2522 | dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; | ||
2523 | dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; | ||
2524 | dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; | ||
2525 | dst+=dstStride; | ||
2526 | src+=srcStride; | ||
2527 | } | ||
2528 | } | ||
2529 | |||
2530 | #if CONFIG_CAVS_DECODER | ||
2531 | /* AVS specific */ | ||
2532 | void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx); | ||
2533 | |||
2534 | void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | ||
2535 | put_pixels8_c(dst, src, stride, 8); | ||
2536 | } | ||
2537 | void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | ||
2538 | avg_pixels8_c(dst, src, stride, 8); | ||
2539 | } | ||
2540 | void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | ||
2541 | put_pixels16_c(dst, src, stride, 16); | ||
2542 | } | ||
2543 | void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { | ||
2544 | avg_pixels16_c(dst, src, stride, 16); | ||
2545 | } | ||
2546 | #endif /* CONFIG_CAVS_DECODER */ | ||
2547 | |||
2548 | #if CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER | ||
2549 | /* VC-1 specific */ | ||
2550 | void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx); | ||
2551 | |||
2552 | void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { | ||
2553 | put_pixels8_c(dst, src, stride, 8); | ||
2554 | } | ||
2555 | #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */ | ||
2556 | |||
2557 | void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); | ||
2558 | |||
2559 | /* H264 specific */ | ||
2560 | void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx); | ||
2561 | |||
2562 | #if CONFIG_RV30_DECODER | ||
2563 | void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx); | ||
2564 | #endif /* CONFIG_RV30_DECODER */ | ||
2565 | |||
2566 | #if CONFIG_RV40_DECODER | ||
2567 | static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2568 | put_pixels16_xy2_c(dst, src, stride, 16); | ||
2569 | } | ||
2570 | static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2571 | avg_pixels16_xy2_c(dst, src, stride, 16); | ||
2572 | } | ||
2573 | static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2574 | put_pixels8_xy2_c(dst, src, stride, 8); | ||
2575 | } | ||
2576 | static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2577 | avg_pixels8_xy2_c(dst, src, stride, 8); | ||
2578 | } | ||
2579 | |||
2580 | void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx); | ||
2581 | #endif /* CONFIG_RV40_DECODER */ | ||
2582 | |||
2583 | static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ | ||
2584 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
2585 | int i; | ||
2586 | |||
2587 | for(i=0; i<w; i++){ | ||
2588 | const int src_1= src[ -srcStride]; | ||
2589 | const int src0 = src[0 ]; | ||
2590 | const int src1 = src[ srcStride]; | ||
2591 | const int src2 = src[2*srcStride]; | ||
2592 | const int src3 = src[3*srcStride]; | ||
2593 | const int src4 = src[4*srcStride]; | ||
2594 | const int src5 = src[5*srcStride]; | ||
2595 | const int src6 = src[6*srcStride]; | ||
2596 | const int src7 = src[7*srcStride]; | ||
2597 | const int src8 = src[8*srcStride]; | ||
2598 | const int src9 = src[9*srcStride]; | ||
2599 | dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; | ||
2600 | dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; | ||
2601 | dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; | ||
2602 | dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; | ||
2603 | dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; | ||
2604 | dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; | ||
2605 | dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; | ||
2606 | dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; | ||
2607 | src++; | ||
2608 | dst++; | ||
2609 | } | ||
2610 | } | ||
2611 | |||
2612 | static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ | ||
2613 | put_pixels8_c(dst, src, stride, 8); | ||
2614 | } | ||
2615 | |||
2616 | static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2617 | uint8_t half[64]; | ||
2618 | wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | ||
2619 | put_pixels8_l2(dst, src, half, stride, stride, 8, 8); | ||
2620 | } | ||
2621 | |||
2622 | static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2623 | wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); | ||
2624 | } | ||
2625 | |||
2626 | static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2627 | uint8_t half[64]; | ||
2628 | wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | ||
2629 | put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); | ||
2630 | } | ||
2631 | |||
2632 | static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2633 | wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); | ||
2634 | } | ||
2635 | |||
2636 | static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2637 | uint8_t halfH[88]; | ||
2638 | uint8_t halfV[64]; | ||
2639 | uint8_t halfHV[64]; | ||
2640 | wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | ||
2641 | wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); | ||
2642 | wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | ||
2643 | put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | ||
2644 | } | ||
2645 | static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2646 | uint8_t halfH[88]; | ||
2647 | uint8_t halfV[64]; | ||
2648 | uint8_t halfHV[64]; | ||
2649 | wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | ||
2650 | wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); | ||
2651 | wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | ||
2652 | put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | ||
2653 | } | ||
2654 | static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ | ||
2655 | uint8_t halfH[88]; | ||
2656 | wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | ||
2657 | wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); | ||
2658 | } | ||
2659 | |||
2660 | static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ | ||
2661 | if(CONFIG_ANY_H263) { | ||
2662 | int x; | ||
2663 | const int strength= ff_h263_loop_filter_strength[qscale]; | ||
2664 | |||
2665 | for(x=0; x<8; x++){ | ||
2666 | int d1, d2, ad1; | ||
2667 | int p0= src[x-2*stride]; | ||
2668 | int p1= src[x-1*stride]; | ||
2669 | int p2= src[x+0*stride]; | ||
2670 | int p3= src[x+1*stride]; | ||
2671 | int d = (p0 - p3 + 4*(p2 - p1)) / 8; | ||
2672 | |||
2673 | if (d<-2*strength) d1= 0; | ||
2674 | else if(d<- strength) d1=-2*strength - d; | ||
2675 | else if(d< strength) d1= d; | ||
2676 | else if(d< 2*strength) d1= 2*strength - d; | ||
2677 | else d1= 0; | ||
2678 | |||
2679 | p1 += d1; | ||
2680 | p2 -= d1; | ||
2681 | if(p1&256) p1= ~(p1>>31); | ||
2682 | if(p2&256) p2= ~(p2>>31); | ||
2683 | |||
2684 | src[x-1*stride] = p1; | ||
2685 | src[x+0*stride] = p2; | ||
2686 | |||
2687 | ad1= FFABS(d1)>>1; | ||
2688 | |||
2689 | d2= av_clip((p0-p3)/4, -ad1, ad1); | ||
2690 | |||
2691 | src[x-2*stride] = p0 - d2; | ||
2692 | src[x+ stride] = p3 + d2; | ||
2693 | } | ||
2694 | } | ||
2695 | } | ||
2696 | |||
2697 | static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ | ||
2698 | if(CONFIG_ANY_H263) { | ||
2699 | int y; | ||
2700 | const int strength= ff_h263_loop_filter_strength[qscale]; | ||
2701 | |||
2702 | for(y=0; y<8; y++){ | ||
2703 | int d1, d2, ad1; | ||
2704 | int p0= src[y*stride-2]; | ||
2705 | int p1= src[y*stride-1]; | ||
2706 | int p2= src[y*stride+0]; | ||
2707 | int p3= src[y*stride+1]; | ||
2708 | int d = (p0 - p3 + 4*(p2 - p1)) / 8; | ||
2709 | |||
2710 | if (d<-2*strength) d1= 0; | ||
2711 | else if(d<- strength) d1=-2*strength - d; | ||
2712 | else if(d< strength) d1= d; | ||
2713 | else if(d< 2*strength) d1= 2*strength - d; | ||
2714 | else d1= 0; | ||
2715 | |||
2716 | p1 += d1; | ||
2717 | p2 -= d1; | ||
2718 | if(p1&256) p1= ~(p1>>31); | ||
2719 | if(p2&256) p2= ~(p2>>31); | ||
2720 | |||
2721 | src[y*stride-1] = p1; | ||
2722 | src[y*stride+0] = p2; | ||
2723 | |||
2724 | ad1= FFABS(d1)>>1; | ||
2725 | |||
2726 | d2= av_clip((p0-p3)/4, -ad1, ad1); | ||
2727 | |||
2728 | src[y*stride-2] = p0 - d2; | ||
2729 | src[y*stride+1] = p3 + d2; | ||
2730 | } | ||
2731 | } | ||
2732 | } | ||
2733 | |||
2734 | static void h261_loop_filter_c(uint8_t *src, int stride){ | ||
2735 | int x,y,xy,yz; | ||
2736 | int temp[64]; | ||
2737 | |||
2738 | for(x=0; x<8; x++){ | ||
2739 | temp[x ] = 4*src[x ]; | ||
2740 | temp[x + 7*8] = 4*src[x + 7*stride]; | ||
2741 | } | ||
2742 | for(y=1; y<7; y++){ | ||
2743 | for(x=0; x<8; x++){ | ||
2744 | xy = y * stride + x; | ||
2745 | yz = y * 8 + x; | ||
2746 | temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; | ||
2747 | } | ||
2748 | } | ||
2749 | |||
2750 | for(y=0; y<8; y++){ | ||
2751 | src[ y*stride] = (temp[ y*8] + 2)>>2; | ||
2752 | src[7+y*stride] = (temp[7+y*8] + 2)>>2; | ||
2753 | for(x=1; x<7; x++){ | ||
2754 | xy = y * stride + x; | ||
2755 | yz = y * 8 + x; | ||
2756 | src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; | ||
2757 | } | ||
2758 | } | ||
2759 | } | ||
2760 | |||
2761 | static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) | ||
2762 | { | ||
2763 | int i, d; | ||
2764 | for( i = 0; i < 4; i++ ) { | ||
2765 | if( tc0[i] < 0 ) { | ||
2766 | pix += 4*ystride; | ||
2767 | continue; | ||
2768 | } | ||
2769 | for( d = 0; d < 4; d++ ) { | ||
2770 | const int p0 = pix[-1*xstride]; | ||
2771 | const int p1 = pix[-2*xstride]; | ||
2772 | const int p2 = pix[-3*xstride]; | ||
2773 | const int q0 = pix[0]; | ||
2774 | const int q1 = pix[1*xstride]; | ||
2775 | const int q2 = pix[2*xstride]; | ||
2776 | |||
2777 | if( FFABS( p0 - q0 ) < alpha && | ||
2778 | FFABS( p1 - p0 ) < beta && | ||
2779 | FFABS( q1 - q0 ) < beta ) { | ||
2780 | |||
2781 | int tc = tc0[i]; | ||
2782 | int i_delta; | ||
2783 | |||
2784 | if( FFABS( p2 - p0 ) < beta ) { | ||
2785 | pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); | ||
2786 | tc++; | ||
2787 | } | ||
2788 | if( FFABS( q2 - q0 ) < beta ) { | ||
2789 | pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); | ||
2790 | tc++; | ||
2791 | } | ||
2792 | |||
2793 | i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); | ||
2794 | pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */ | ||
2795 | pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ | ||
2796 | } | ||
2797 | pix += ystride; | ||
2798 | } | ||
2799 | } | ||
2800 | } | ||
2801 | static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | ||
2802 | { | ||
2803 | h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); | ||
2804 | } | ||
2805 | static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | ||
2806 | { | ||
2807 | h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); | ||
2808 | } | ||
2809 | |||
2810 | static inline void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) | ||
2811 | { | ||
2812 | int d; | ||
2813 | for( d = 0; d < 16; d++ ) { | ||
2814 | const int p2 = pix[-3*xstride]; | ||
2815 | const int p1 = pix[-2*xstride]; | ||
2816 | const int p0 = pix[-1*xstride]; | ||
2817 | |||
2818 | const int q0 = pix[ 0*xstride]; | ||
2819 | const int q1 = pix[ 1*xstride]; | ||
2820 | const int q2 = pix[ 2*xstride]; | ||
2821 | |||
2822 | if( FFABS( p0 - q0 ) < alpha && | ||
2823 | FFABS( p1 - p0 ) < beta && | ||
2824 | FFABS( q1 - q0 ) < beta ) { | ||
2825 | |||
2826 | if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ | ||
2827 | if( FFABS( p2 - p0 ) < beta) | ||
2828 | { | ||
2829 | const int p3 = pix[-4*xstride]; | ||
2830 | /* p0', p1', p2' */ | ||
2831 | pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; | ||
2832 | pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; | ||
2833 | pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; | ||
2834 | } else { | ||
2835 | /* p0' */ | ||
2836 | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | ||
2837 | } | ||
2838 | if( FFABS( q2 - q0 ) < beta) | ||
2839 | { | ||
2840 | const int q3 = pix[3*xstride]; | ||
2841 | /* q0', q1', q2' */ | ||
2842 | pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; | ||
2843 | pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; | ||
2844 | pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; | ||
2845 | } else { | ||
2846 | /* q0' */ | ||
2847 | pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | ||
2848 | } | ||
2849 | }else{ | ||
2850 | /* p0', q0' */ | ||
2851 | pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | ||
2852 | pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | ||
2853 | } | ||
2854 | } | ||
2855 | pix += ystride; | ||
2856 | } | ||
2857 | } | ||
2858 | static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) | ||
2859 | { | ||
2860 | h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta); | ||
2861 | } | ||
2862 | static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) | ||
2863 | { | ||
2864 | h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta); | ||
2865 | } | ||
2866 | |||
2867 | static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) | ||
2868 | { | ||
2869 | int i, d; | ||
2870 | for( i = 0; i < 4; i++ ) { | ||
2871 | const int tc = tc0[i]; | ||
2872 | if( tc <= 0 ) { | ||
2873 | pix += 2*ystride; | ||
2874 | continue; | ||
2875 | } | ||
2876 | for( d = 0; d < 2; d++ ) { | ||
2877 | const int p0 = pix[-1*xstride]; | ||
2878 | const int p1 = pix[-2*xstride]; | ||
2879 | const int q0 = pix[0]; | ||
2880 | const int q1 = pix[1*xstride]; | ||
2881 | |||
2882 | if( FFABS( p0 - q0 ) < alpha && | ||
2883 | FFABS( p1 - p0 ) < beta && | ||
2884 | FFABS( q1 - q0 ) < beta ) { | ||
2885 | |||
2886 | int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); | ||
2887 | |||
2888 | pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */ | ||
2889 | pix[0] = av_clip_uint8( q0 - delta ); /* q0' */ | ||
2890 | } | ||
2891 | pix += ystride; | ||
2892 | } | ||
2893 | } | ||
2894 | } | ||
2895 | static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | ||
2896 | { | ||
2897 | h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); | ||
2898 | } | ||
2899 | static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | ||
2900 | { | ||
2901 | h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); | ||
2902 | } | ||
2903 | |||
2904 | static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) | ||
2905 | { | ||
2906 | int d; | ||
2907 | for( d = 0; d < 8; d++ ) { | ||
2908 | const int p0 = pix[-1*xstride]; | ||
2909 | const int p1 = pix[-2*xstride]; | ||
2910 | const int q0 = pix[0]; | ||
2911 | const int q1 = pix[1*xstride]; | ||
2912 | |||
2913 | if( FFABS( p0 - q0 ) < alpha && | ||
2914 | FFABS( p1 - p0 ) < beta && | ||
2915 | FFABS( q1 - q0 ) < beta ) { | ||
2916 | |||
2917 | pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ | ||
2918 | pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ | ||
2919 | } | ||
2920 | pix += ystride; | ||
2921 | } | ||
2922 | } | ||
2923 | static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) | ||
2924 | { | ||
2925 | h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta); | ||
2926 | } | ||
2927 | static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) | ||
2928 | { | ||
2929 | h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta); | ||
2930 | } | ||
2931 | |||
2932 | static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
2933 | { | ||
2934 | int s, i; | ||
2935 | |||
2936 | s = 0; | ||
2937 | for(i=0;i<h;i++) { | ||
2938 | s += abs(pix1[0] - pix2[0]); | ||
2939 | s += abs(pix1[1] - pix2[1]); | ||
2940 | s += abs(pix1[2] - pix2[2]); | ||
2941 | s += abs(pix1[3] - pix2[3]); | ||
2942 | s += abs(pix1[4] - pix2[4]); | ||
2943 | s += abs(pix1[5] - pix2[5]); | ||
2944 | s += abs(pix1[6] - pix2[6]); | ||
2945 | s += abs(pix1[7] - pix2[7]); | ||
2946 | s += abs(pix1[8] - pix2[8]); | ||
2947 | s += abs(pix1[9] - pix2[9]); | ||
2948 | s += abs(pix1[10] - pix2[10]); | ||
2949 | s += abs(pix1[11] - pix2[11]); | ||
2950 | s += abs(pix1[12] - pix2[12]); | ||
2951 | s += abs(pix1[13] - pix2[13]); | ||
2952 | s += abs(pix1[14] - pix2[14]); | ||
2953 | s += abs(pix1[15] - pix2[15]); | ||
2954 | pix1 += line_size; | ||
2955 | pix2 += line_size; | ||
2956 | } | ||
2957 | return s; | ||
2958 | } | ||
2959 | |||
2960 | static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
2961 | { | ||
2962 | int s, i; | ||
2963 | |||
2964 | s = 0; | ||
2965 | for(i=0;i<h;i++) { | ||
2966 | s += abs(pix1[0] - avg2(pix2[0], pix2[1])); | ||
2967 | s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | ||
2968 | s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | ||
2969 | s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | ||
2970 | s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | ||
2971 | s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | ||
2972 | s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | ||
2973 | s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | ||
2974 | s += abs(pix1[8] - avg2(pix2[8], pix2[9])); | ||
2975 | s += abs(pix1[9] - avg2(pix2[9], pix2[10])); | ||
2976 | s += abs(pix1[10] - avg2(pix2[10], pix2[11])); | ||
2977 | s += abs(pix1[11] - avg2(pix2[11], pix2[12])); | ||
2978 | s += abs(pix1[12] - avg2(pix2[12], pix2[13])); | ||
2979 | s += abs(pix1[13] - avg2(pix2[13], pix2[14])); | ||
2980 | s += abs(pix1[14] - avg2(pix2[14], pix2[15])); | ||
2981 | s += abs(pix1[15] - avg2(pix2[15], pix2[16])); | ||
2982 | pix1 += line_size; | ||
2983 | pix2 += line_size; | ||
2984 | } | ||
2985 | return s; | ||
2986 | } | ||
2987 | |||
2988 | static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
2989 | { | ||
2990 | int s, i; | ||
2991 | uint8_t *pix3 = pix2 + line_size; | ||
2992 | |||
2993 | s = 0; | ||
2994 | for(i=0;i<h;i++) { | ||
2995 | s += abs(pix1[0] - avg2(pix2[0], pix3[0])); | ||
2996 | s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | ||
2997 | s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | ||
2998 | s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | ||
2999 | s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | ||
3000 | s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | ||
3001 | s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | ||
3002 | s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | ||
3003 | s += abs(pix1[8] - avg2(pix2[8], pix3[8])); | ||
3004 | s += abs(pix1[9] - avg2(pix2[9], pix3[9])); | ||
3005 | s += abs(pix1[10] - avg2(pix2[10], pix3[10])); | ||
3006 | s += abs(pix1[11] - avg2(pix2[11], pix3[11])); | ||
3007 | s += abs(pix1[12] - avg2(pix2[12], pix3[12])); | ||
3008 | s += abs(pix1[13] - avg2(pix2[13], pix3[13])); | ||
3009 | s += abs(pix1[14] - avg2(pix2[14], pix3[14])); | ||
3010 | s += abs(pix1[15] - avg2(pix2[15], pix3[15])); | ||
3011 | pix1 += line_size; | ||
3012 | pix2 += line_size; | ||
3013 | pix3 += line_size; | ||
3014 | } | ||
3015 | return s; | ||
3016 | } | ||
3017 | |||
3018 | static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
3019 | { | ||
3020 | int s, i; | ||
3021 | uint8_t *pix3 = pix2 + line_size; | ||
3022 | |||
3023 | s = 0; | ||
3024 | for(i=0;i<h;i++) { | ||
3025 | s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); | ||
3026 | s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | ||
3027 | s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | ||
3028 | s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | ||
3029 | s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | ||
3030 | s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | ||
3031 | s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | ||
3032 | s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | ||
3033 | s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); | ||
3034 | s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); | ||
3035 | s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); | ||
3036 | s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); | ||
3037 | s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); | ||
3038 | s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); | ||
3039 | s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); | ||
3040 | s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); | ||
3041 | pix1 += line_size; | ||
3042 | pix2 += line_size; | ||
3043 | pix3 += line_size; | ||
3044 | } | ||
3045 | return s; | ||
3046 | } | ||
3047 | |||
3048 | static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
3049 | { | ||
3050 | int s, i; | ||
3051 | |||
3052 | s = 0; | ||
3053 | for(i=0;i<h;i++) { | ||
3054 | s += abs(pix1[0] - pix2[0]); | ||
3055 | s += abs(pix1[1] - pix2[1]); | ||
3056 | s += abs(pix1[2] - pix2[2]); | ||
3057 | s += abs(pix1[3] - pix2[3]); | ||
3058 | s += abs(pix1[4] - pix2[4]); | ||
3059 | s += abs(pix1[5] - pix2[5]); | ||
3060 | s += abs(pix1[6] - pix2[6]); | ||
3061 | s += abs(pix1[7] - pix2[7]); | ||
3062 | pix1 += line_size; | ||
3063 | pix2 += line_size; | ||
3064 | } | ||
3065 | return s; | ||
3066 | } | ||
3067 | |||
3068 | static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
3069 | { | ||
3070 | int s, i; | ||
3071 | |||
3072 | s = 0; | ||
3073 | for(i=0;i<h;i++) { | ||
3074 | s += abs(pix1[0] - avg2(pix2[0], pix2[1])); | ||
3075 | s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | ||
3076 | s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | ||
3077 | s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | ||
3078 | s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | ||
3079 | s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | ||
3080 | s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | ||
3081 | s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | ||
3082 | pix1 += line_size; | ||
3083 | pix2 += line_size; | ||
3084 | } | ||
3085 | return s; | ||
3086 | } | ||
3087 | |||
3088 | static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
3089 | { | ||
3090 | int s, i; | ||
3091 | uint8_t *pix3 = pix2 + line_size; | ||
3092 | |||
3093 | s = 0; | ||
3094 | for(i=0;i<h;i++) { | ||
3095 | s += abs(pix1[0] - avg2(pix2[0], pix3[0])); | ||
3096 | s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | ||
3097 | s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | ||
3098 | s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | ||
3099 | s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | ||
3100 | s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | ||
3101 | s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | ||
3102 | s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | ||
3103 | pix1 += line_size; | ||
3104 | pix2 += line_size; | ||
3105 | pix3 += line_size; | ||
3106 | } | ||
3107 | return s; | ||
3108 | } | ||
3109 | |||
3110 | static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | ||
3111 | { | ||
3112 | int s, i; | ||
3113 | uint8_t *pix3 = pix2 + line_size; | ||
3114 | |||
3115 | s = 0; | ||
3116 | for(i=0;i<h;i++) { | ||
3117 | s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); | ||
3118 | s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | ||
3119 | s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | ||
3120 | s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | ||
3121 | s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | ||
3122 | s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | ||
3123 | s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | ||
3124 | s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | ||
3125 | pix1 += line_size; | ||
3126 | pix2 += line_size; | ||
3127 | pix3 += line_size; | ||
3128 | } | ||
3129 | return s; | ||
3130 | } | ||
3131 | |||
3132 | static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ | ||
3133 | MpegEncContext *c = v; | ||
3134 | int score1=0; | ||
3135 | int score2=0; | ||
3136 | int x,y; | ||
3137 | |||
3138 | for(y=0; y<h; y++){ | ||
3139 | for(x=0; x<16; x++){ | ||
3140 | score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); | ||
3141 | } | ||
3142 | if(y+1<h){ | ||
3143 | for(x=0; x<15; x++){ | ||
3144 | score2+= FFABS( s1[x ] - s1[x +stride] | ||
3145 | - s1[x+1] + s1[x+1+stride]) | ||
3146 | -FFABS( s2[x ] - s2[x +stride] | ||
3147 | - s2[x+1] + s2[x+1+stride]); | ||
3148 | } | ||
3149 | } | ||
3150 | s1+= stride; | ||
3151 | s2+= stride; | ||
3152 | } | ||
3153 | |||
3154 | if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; | ||
3155 | else return score1 + FFABS(score2)*8; | ||
3156 | } | ||
3157 | |||
3158 | static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ | ||
3159 | MpegEncContext *c = v; | ||
3160 | int score1=0; | ||
3161 | int score2=0; | ||
3162 | int x,y; | ||
3163 | |||
3164 | for(y=0; y<h; y++){ | ||
3165 | for(x=0; x<8; x++){ | ||
3166 | score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); | ||
3167 | } | ||
3168 | if(y+1<h){ | ||
3169 | for(x=0; x<7; x++){ | ||
3170 | score2+= FFABS( s1[x ] - s1[x +stride] | ||
3171 | - s1[x+1] + s1[x+1+stride]) | ||
3172 | -FFABS( s2[x ] - s2[x +stride] | ||
3173 | - s2[x+1] + s2[x+1+stride]); | ||
3174 | } | ||
3175 | } | ||
3176 | s1+= stride; | ||
3177 | s2+= stride; | ||
3178 | } | ||
3179 | |||
3180 | if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; | ||
3181 | else return score1 + FFABS(score2)*8; | ||
3182 | } | ||
3183 | |||
3184 | static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ | ||
3185 | int i; | ||
3186 | unsigned int sum=0; | ||
3187 | |||
3188 | for(i=0; i<8*8; i++){ | ||
3189 | int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); | ||
3190 | int w= weight[i]; | ||
3191 | b>>= RECON_SHIFT; | ||
3192 | assert(-512<b && b<512); | ||
3193 | |||
3194 | sum += (w*b)*(w*b)>>4; | ||
3195 | } | ||
3196 | return sum>>2; | ||
3197 | } | ||
3198 | |||
3199 | static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ | ||
3200 | int i; | ||
3201 | |||
3202 | for(i=0; i<8*8; i++){ | ||
3203 | rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); | ||
3204 | } | ||
3205 | } | ||
3206 | |||
3207 | /** | ||
3208 | * permutes an 8x8 block. | ||
3209 | * @param block the block which will be permuted according to the given permutation vector | ||
3210 | * @param permutation the permutation vector | ||
3211 | * @param last the last non zero coefficient in scantable order, used to speed the permutation up | ||
3212 | * @param scantable the used scantable, this is only used to speed the permutation up, the block is not | ||
3213 | * (inverse) permutated to scantable order! | ||
3214 | */ | ||
3215 | void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) | ||
3216 | { | ||
3217 | int i; | ||
3218 | DCTELEM temp[64]; | ||
3219 | |||
3220 | if(last<=0) return; | ||
3221 | //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations | ||
3222 | |||
3223 | for(i=0; i<=last; i++){ | ||
3224 | const int j= scantable[i]; | ||
3225 | temp[j]= block[j]; | ||
3226 | block[j]=0; | ||
3227 | } | ||
3228 | |||
3229 | for(i=0; i<=last; i++){ | ||
3230 | const int j= scantable[i]; | ||
3231 | const int perm_j= permutation[j]; | ||
3232 | block[perm_j]= temp[j]; | ||
3233 | } | ||
3234 | } | ||
3235 | |||
3236 | static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ | ||
3237 | return 0; | ||
3238 | } | ||
3239 | |||
3240 | void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ | ||
3241 | int i; | ||
3242 | |||
3243 | memset(cmp, 0, sizeof(void*)*6); | ||
3244 | |||
3245 | for(i=0; i<6; i++){ | ||
3246 | switch(type&0xFF){ | ||
3247 | case FF_CMP_SAD: | ||
3248 | cmp[i]= c->sad[i]; | ||
3249 | break; | ||
3250 | case FF_CMP_SATD: | ||
3251 | cmp[i]= c->hadamard8_diff[i]; | ||
3252 | break; | ||
3253 | case FF_CMP_SSE: | ||
3254 | cmp[i]= c->sse[i]; | ||
3255 | break; | ||
3256 | case FF_CMP_DCT: | ||
3257 | cmp[i]= c->dct_sad[i]; | ||
3258 | break; | ||
3259 | case FF_CMP_DCT264: | ||
3260 | cmp[i]= c->dct264_sad[i]; | ||
3261 | break; | ||
3262 | case FF_CMP_DCTMAX: | ||
3263 | cmp[i]= c->dct_max[i]; | ||
3264 | break; | ||
3265 | case FF_CMP_PSNR: | ||
3266 | cmp[i]= c->quant_psnr[i]; | ||
3267 | break; | ||
3268 | case FF_CMP_BIT: | ||
3269 | cmp[i]= c->bit[i]; | ||
3270 | break; | ||
3271 | case FF_CMP_RD: | ||
3272 | cmp[i]= c->rd[i]; | ||
3273 | break; | ||
3274 | case FF_CMP_VSAD: | ||
3275 | cmp[i]= c->vsad[i]; | ||
3276 | break; | ||
3277 | case FF_CMP_VSSE: | ||
3278 | cmp[i]= c->vsse[i]; | ||
3279 | break; | ||
3280 | case FF_CMP_ZERO: | ||
3281 | cmp[i]= zero_cmp; | ||
3282 | break; | ||
3283 | case FF_CMP_NSSE: | ||
3284 | cmp[i]= c->nsse[i]; | ||
3285 | break; | ||
3286 | #if CONFIG_SNOW_ENCODER | ||
3287 | case FF_CMP_W53: | ||
3288 | cmp[i]= c->w53[i]; | ||
3289 | break; | ||
3290 | case FF_CMP_W97: | ||
3291 | cmp[i]= c->w97[i]; | ||
3292 | break; | ||
3293 | #endif | ||
3294 | default: | ||
3295 | av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); | ||
3296 | } | ||
3297 | } | ||
3298 | } | ||
3299 | |||
3300 | static void clear_block_c(DCTELEM *block) | ||
3301 | { | ||
3302 | memset(block, 0, sizeof(DCTELEM)*64); | ||
3303 | } | ||
3304 | |||
3305 | /** | ||
3306 | * memset(blocks, 0, sizeof(DCTELEM)*6*64) | ||
3307 | */ | ||
3308 | static void clear_blocks_c(DCTELEM *blocks) | ||
3309 | { | ||
3310 | memset(blocks, 0, sizeof(DCTELEM)*6*64); | ||
3311 | } | ||
3312 | |||
3313 | static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ | ||
3314 | long i; | ||
3315 | for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | ||
3316 | long a = *(long*)(src+i); | ||
3317 | long b = *(long*)(dst+i); | ||
3318 | *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); | ||
3319 | } | ||
3320 | for(; i<w; i++) | ||
3321 | dst[i+0] += src[i+0]; | ||
3322 | } | ||
3323 | |||
3324 | static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | ||
3325 | long i; | ||
3326 | for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | ||
3327 | long a = *(long*)(src1+i); | ||
3328 | long b = *(long*)(src2+i); | ||
3329 | *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); | ||
3330 | } | ||
3331 | for(; i<w; i++) | ||
3332 | dst[i] = src1[i]+src2[i]; | ||
3333 | } | ||
3334 | |||
3335 | static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | ||
3336 | long i; | ||
3337 | #if !HAVE_FAST_UNALIGNED | ||
3338 | if((long)src2 & (sizeof(long)-1)){ | ||
3339 | for(i=0; i+7<w; i+=8){ | ||
3340 | dst[i+0] = src1[i+0]-src2[i+0]; | ||
3341 | dst[i+1] = src1[i+1]-src2[i+1]; | ||
3342 | dst[i+2] = src1[i+2]-src2[i+2]; | ||
3343 | dst[i+3] = src1[i+3]-src2[i+3]; | ||
3344 | dst[i+4] = src1[i+4]-src2[i+4]; | ||
3345 | dst[i+5] = src1[i+5]-src2[i+5]; | ||
3346 | dst[i+6] = src1[i+6]-src2[i+6]; | ||
3347 | dst[i+7] = src1[i+7]-src2[i+7]; | ||
3348 | } | ||
3349 | }else | ||
3350 | #endif | ||
3351 | for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | ||
3352 | long a = *(long*)(src1+i); | ||
3353 | long b = *(long*)(src2+i); | ||
3354 | *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80); | ||
3355 | } | ||
3356 | for(; i<w; i++) | ||
3357 | dst[i+0] = src1[i+0]-src2[i+0]; | ||
3358 | } | ||
3359 | |||
3360 | static void add_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *diff, int w, int *left, int *left_top){ | ||
3361 | int i; | ||
3362 | uint8_t l, lt; | ||
3363 | |||
3364 | l= *left; | ||
3365 | lt= *left_top; | ||
3366 | |||
3367 | for(i=0; i<w; i++){ | ||
3368 | l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i]; | ||
3369 | lt= src1[i]; | ||
3370 | dst[i]= l; | ||
3371 | } | ||
3372 | |||
3373 | *left= l; | ||
3374 | *left_top= lt; | ||
3375 | } | ||
3376 | |||
3377 | static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ | ||
3378 | int i; | ||
3379 | uint8_t l, lt; | ||
3380 | |||
3381 | l= *left; | ||
3382 | lt= *left_top; | ||
3383 | |||
3384 | for(i=0; i<w; i++){ | ||
3385 | const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); | ||
3386 | lt= src1[i]; | ||
3387 | l= src2[i]; | ||
3388 | dst[i]= l - pred; | ||
3389 | } | ||
3390 | |||
3391 | *left= l; | ||
3392 | *left_top= lt; | ||
3393 | } | ||
3394 | |||
3395 | #define BUTTERFLY2(o1,o2,i1,i2) \ | ||
3396 | o1= (i1)+(i2);\ | ||
3397 | o2= (i1)-(i2); | ||
3398 | |||
3399 | #define BUTTERFLY1(x,y) \ | ||
3400 | {\ | ||
3401 | int a,b;\ | ||
3402 | a= x;\ | ||
3403 | b= y;\ | ||
3404 | x= a+b;\ | ||
3405 | y= a-b;\ | ||
3406 | } | ||
3407 | |||
3408 | #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y))) | ||
3409 | |||
3410 | static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ | ||
3411 | int i; | ||
3412 | int temp[64]; | ||
3413 | int sum=0; | ||
3414 | |||
3415 | assert(h==8); | ||
3416 | |||
3417 | for(i=0; i<8; i++){ | ||
3418 | //FIXME try pointer walks | ||
3419 | BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); | ||
3420 | BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); | ||
3421 | BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); | ||
3422 | BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); | ||
3423 | |||
3424 | BUTTERFLY1(temp[8*i+0], temp[8*i+2]); | ||
3425 | BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | ||
3426 | BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | ||
3427 | BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | ||
3428 | |||
3429 | BUTTERFLY1(temp[8*i+0], temp[8*i+4]); | ||
3430 | BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | ||
3431 | BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | ||
3432 | BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | ||
3433 | } | ||
3434 | |||
3435 | for(i=0; i<8; i++){ | ||
3436 | BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | ||
3437 | BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | ||
3438 | BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | ||
3439 | BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | ||
3440 | |||
3441 | BUTTERFLY1(temp[8*0+i], temp[8*2+i]); | ||
3442 | BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | ||
3443 | BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | ||
3444 | BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | ||
3445 | |||
3446 | sum += | ||
3447 | BUTTERFLYA(temp[8*0+i], temp[8*4+i]) | ||
3448 | +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | ||
3449 | +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | ||
3450 | +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | ||
3451 | } | ||
3452 | #if 0 | ||
3453 | static int maxi=0; | ||
3454 | if(sum>maxi){ | ||
3455 | maxi=sum; | ||
3456 | printf("MAX:%d\n", maxi); | ||
3457 | } | ||
3458 | #endif | ||
3459 | return sum; | ||
3460 | } | ||
3461 | |||
3462 | static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ | ||
3463 | int i; | ||
3464 | int temp[64]; | ||
3465 | int sum=0; | ||
3466 | |||
3467 | assert(h==8); | ||
3468 | |||
3469 | for(i=0; i<8; i++){ | ||
3470 | //FIXME try pointer walks | ||
3471 | BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); | ||
3472 | BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); | ||
3473 | BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); | ||
3474 | BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); | ||
3475 | |||
3476 | BUTTERFLY1(temp[8*i+0], temp[8*i+2]); | ||
3477 | BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | ||
3478 | BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | ||
3479 | BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | ||
3480 | |||
3481 | BUTTERFLY1(temp[8*i+0], temp[8*i+4]); | ||
3482 | BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | ||
3483 | BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | ||
3484 | BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | ||
3485 | } | ||
3486 | |||
3487 | for(i=0; i<8; i++){ | ||
3488 | BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | ||
3489 | BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | ||
3490 | BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | ||
3491 | BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | ||
3492 | |||
3493 | BUTTERFLY1(temp[8*0+i], temp[8*2+i]); | ||
3494 | BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | ||
3495 | BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | ||
3496 | BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | ||
3497 | |||
3498 | sum += | ||
3499 | BUTTERFLYA(temp[8*0+i], temp[8*4+i]) | ||
3500 | +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | ||
3501 | +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | ||
3502 | +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | ||
3503 | } | ||
3504 | |||
3505 | sum -= FFABS(temp[8*0] + temp[8*4]); // -mean | ||
3506 | |||
3507 | return sum; | ||
3508 | } | ||
3509 | |||
3510 | static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||
3511 | MpegEncContext * const s= (MpegEncContext *)c; | ||
3512 | DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); | ||
3513 | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||
3514 | |||
3515 | assert(h==8); | ||
3516 | |||
3517 | s->dsp.diff_pixels(temp, src1, src2, stride); | ||
3518 | s->dsp.fdct(temp); | ||
3519 | return s->dsp.sum_abs_dctelem(temp); | ||
3520 | } | ||
3521 | |||
3522 | #if CONFIG_GPL | ||
3523 | #define DCT8_1D {\ | ||
3524 | const int s07 = SRC(0) + SRC(7);\ | ||
3525 | const int s16 = SRC(1) + SRC(6);\ | ||
3526 | const int s25 = SRC(2) + SRC(5);\ | ||
3527 | const int s34 = SRC(3) + SRC(4);\ | ||
3528 | const int a0 = s07 + s34;\ | ||
3529 | const int a1 = s16 + s25;\ | ||
3530 | const int a2 = s07 - s34;\ | ||
3531 | const int a3 = s16 - s25;\ | ||
3532 | const int d07 = SRC(0) - SRC(7);\ | ||
3533 | const int d16 = SRC(1) - SRC(6);\ | ||
3534 | const int d25 = SRC(2) - SRC(5);\ | ||
3535 | const int d34 = SRC(3) - SRC(4);\ | ||
3536 | const int a4 = d16 + d25 + (d07 + (d07>>1));\ | ||
3537 | const int a5 = d07 - d34 - (d25 + (d25>>1));\ | ||
3538 | const int a6 = d07 + d34 - (d16 + (d16>>1));\ | ||
3539 | const int a7 = d16 - d25 + (d34 + (d34>>1));\ | ||
3540 | DST(0, a0 + a1 ) ;\ | ||
3541 | DST(1, a4 + (a7>>2)) ;\ | ||
3542 | DST(2, a2 + (a3>>1)) ;\ | ||
3543 | DST(3, a5 + (a6>>2)) ;\ | ||
3544 | DST(4, a0 - a1 ) ;\ | ||
3545 | DST(5, a6 - (a5>>2)) ;\ | ||
3546 | DST(6, (a2>>1) - a3 ) ;\ | ||
3547 | DST(7, (a4>>2) - a7 ) ;\ | ||
3548 | } | ||
3549 | |||
3550 | static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||
3551 | MpegEncContext * const s= (MpegEncContext *)c; | ||
3552 | DCTELEM dct[8][8]; | ||
3553 | int i; | ||
3554 | int sum=0; | ||
3555 | |||
3556 | s->dsp.diff_pixels(dct[0], src1, src2, stride); | ||
3557 | |||
3558 | #define SRC(x) dct[i][x] | ||
3559 | #define DST(x,v) dct[i][x]= v | ||
3560 | for( i = 0; i < 8; i++ ) | ||
3561 | DCT8_1D | ||
3562 | #undef SRC | ||
3563 | #undef DST | ||
3564 | |||
3565 | #define SRC(x) dct[x][i] | ||
3566 | #define DST(x,v) sum += FFABS(v) | ||
3567 | for( i = 0; i < 8; i++ ) | ||
3568 | DCT8_1D | ||
3569 | #undef SRC | ||
3570 | #undef DST | ||
3571 | return sum; | ||
3572 | } | ||
3573 | #endif | ||
3574 | |||
3575 | static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||
3576 | MpegEncContext * const s= (MpegEncContext *)c; | ||
3577 | DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); | ||
3578 | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||
3579 | int sum=0, i; | ||
3580 | |||
3581 | assert(h==8); | ||
3582 | |||
3583 | s->dsp.diff_pixels(temp, src1, src2, stride); | ||
3584 | s->dsp.fdct(temp); | ||
3585 | |||
3586 | for(i=0; i<64; i++) | ||
3587 | sum= FFMAX(sum, FFABS(temp[i])); | ||
3588 | |||
3589 | return sum; | ||
3590 | } | ||
3591 | |||
3592 | static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||
3593 | MpegEncContext * const s= (MpegEncContext *)c; | ||
3594 | DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]); | ||
3595 | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||
3596 | DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | ||
3597 | int sum=0, i; | ||
3598 | |||
3599 | assert(h==8); | ||
3600 | s->mb_intra=0; | ||
3601 | |||
3602 | s->dsp.diff_pixels(temp, src1, src2, stride); | ||
3603 | |||
3604 | memcpy(bak, temp, 64*sizeof(DCTELEM)); | ||
3605 | |||
3606 | s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); | ||
3607 | s->dct_unquantize_inter(s, temp, 0, s->qscale); | ||
3608 | ff_simple_idct(temp); //FIXME | ||
3609 | |||
3610 | for(i=0; i<64; i++) | ||
3611 | sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | ||
3612 | |||
3613 | return sum; | ||
3614 | } | ||
3615 | |||
3616 | static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||
3617 | MpegEncContext * const s= (MpegEncContext *)c; | ||
3618 | const uint8_t *scantable= s->intra_scantable.permutated; | ||
3619 | DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); | ||
3620 | DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]); | ||
3621 | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||
3622 | uint8_t * const bak= (uint8_t*)aligned_bak; | ||
3623 | int i, last, run, bits, level, distortion, start_i; | ||
3624 | const int esc_length= s->ac_esc_length; | ||
3625 | uint8_t * length; | ||
3626 | uint8_t * last_length; | ||
3627 | |||
3628 | assert(h==8); | ||
3629 | |||
3630 | for(i=0; i<8; i++){ | ||
3631 | ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; | ||
3632 | ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; | ||
3633 | } | ||
3634 | |||
3635 | s->dsp.diff_pixels(temp, src1, src2, stride); | ||
3636 | |||
3637 | s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); | ||
3638 | |||
3639 | bits=0; | ||
3640 | |||
3641 | if (s->mb_intra) { | ||
3642 | start_i = 1; | ||
3643 | length = s->intra_ac_vlc_length; | ||
3644 | last_length= s->intra_ac_vlc_last_length; | ||
3645 | bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma | ||
3646 | } else { | ||
3647 | start_i = 0; | ||
3648 | length = s->inter_ac_vlc_length; | ||
3649 | last_length= s->inter_ac_vlc_last_length; | ||
3650 | } | ||
3651 | |||
3652 | if(last>=start_i){ | ||
3653 | run=0; | ||
3654 | for(i=start_i; i<last; i++){ | ||
3655 | int j= scantable[i]; | ||
3656 | level= temp[j]; | ||
3657 | |||
3658 | if(level){ | ||
3659 | level+=64; | ||
3660 | if((level&(~127)) == 0){ | ||
3661 | bits+= length[UNI_AC_ENC_INDEX(run, level)]; | ||
3662 | }else | ||
3663 | bits+= esc_length; | ||
3664 | run=0; | ||
3665 | }else | ||
3666 | run++; | ||
3667 | } | ||
3668 | i= scantable[last]; | ||
3669 | |||
3670 | level= temp[i] + 64; | ||
3671 | |||
3672 | assert(level - 64); | ||
3673 | |||
3674 | if((level&(~127)) == 0){ | ||
3675 | bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | ||
3676 | }else | ||
3677 | bits+= esc_length; | ||
3678 | |||
3679 | } | ||
3680 | |||
3681 | if(last>=0){ | ||
3682 | if(s->mb_intra) | ||
3683 | s->dct_unquantize_intra(s, temp, 0, s->qscale); | ||
3684 | else | ||
3685 | s->dct_unquantize_inter(s, temp, 0, s->qscale); | ||
3686 | } | ||
3687 | |||
3688 | s->dsp.idct_add(bak, stride, temp); | ||
3689 | |||
3690 | distortion= s->dsp.sse[1](NULL, bak, src1, stride, 8); | ||
3691 | |||
3692 | return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); | ||
3693 | } | ||
3694 | |||
3695 | static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||
3696 | MpegEncContext * const s= (MpegEncContext *)c; | ||
3697 | const uint8_t *scantable= s->intra_scantable.permutated; | ||
3698 | DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); | ||
3699 | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||
3700 | int i, last, run, bits, level, start_i; | ||
3701 | const int esc_length= s->ac_esc_length; | ||
3702 | uint8_t * length; | ||
3703 | uint8_t * last_length; | ||
3704 | |||
3705 | assert(h==8); | ||
3706 | |||
3707 | s->dsp.diff_pixels(temp, src1, src2, stride); | ||
3708 | |||
3709 | s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); | ||
3710 | |||
3711 | bits=0; | ||
3712 | |||
3713 | if (s->mb_intra) { | ||
3714 | start_i = 1; | ||
3715 | length = s->intra_ac_vlc_length; | ||
3716 | last_length= s->intra_ac_vlc_last_length; | ||
3717 | bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma | ||
3718 | } else { | ||
3719 | start_i = 0; | ||
3720 | length = s->inter_ac_vlc_length; | ||
3721 | last_length= s->inter_ac_vlc_last_length; | ||
3722 | } | ||
3723 | |||
3724 | if(last>=start_i){ | ||
3725 | run=0; | ||
3726 | for(i=start_i; i<last; i++){ | ||
3727 | int j= scantable[i]; | ||
3728 | level= temp[j]; | ||
3729 | |||
3730 | if(level){ | ||
3731 | level+=64; | ||
3732 | if((level&(~127)) == 0){ | ||
3733 | bits+= length[UNI_AC_ENC_INDEX(run, level)]; | ||
3734 | }else | ||
3735 | bits+= esc_length; | ||
3736 | run=0; | ||
3737 | }else | ||
3738 | run++; | ||
3739 | } | ||
3740 | i= scantable[last]; | ||
3741 | |||
3742 | level= temp[i] + 64; | ||
3743 | |||
3744 | assert(level - 64); | ||
3745 | |||
3746 | if((level&(~127)) == 0){ | ||
3747 | bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | ||
3748 | }else | ||
3749 | bits+= esc_length; | ||
3750 | } | ||
3751 | |||
3752 | return bits; | ||
3753 | } | ||
3754 | |||
3755 | #define VSAD_INTRA(size) \ | ||
3756 | static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ | ||
3757 | int score=0; \ | ||
3758 | int x,y; \ | ||
3759 | \ | ||
3760 | for(y=1; y<h; y++){ \ | ||
3761 | for(x=0; x<size; x+=4){ \ | ||
3762 | score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \ | ||
3763 | +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \ | ||
3764 | } \ | ||
3765 | s+= stride; \ | ||
3766 | } \ | ||
3767 | \ | ||
3768 | return score; \ | ||
3769 | } | ||
3770 | VSAD_INTRA(8) | ||
3771 | VSAD_INTRA(16) | ||
3772 | |||
3773 | static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | ||
3774 | int score=0; | ||
3775 | int x,y; | ||
3776 | |||
3777 | for(y=1; y<h; y++){ | ||
3778 | for(x=0; x<16; x++){ | ||
3779 | score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | ||
3780 | } | ||
3781 | s1+= stride; | ||
3782 | s2+= stride; | ||
3783 | } | ||
3784 | |||
3785 | return score; | ||
3786 | } | ||
3787 | |||
3788 | #define SQ(a) ((a)*(a)) | ||
3789 | #define VSSE_INTRA(size) \ | ||
3790 | static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ | ||
3791 | int score=0; \ | ||
3792 | int x,y; \ | ||
3793 | \ | ||
3794 | for(y=1; y<h; y++){ \ | ||
3795 | for(x=0; x<size; x+=4){ \ | ||
3796 | score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \ | ||
3797 | +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \ | ||
3798 | } \ | ||
3799 | s+= stride; \ | ||
3800 | } \ | ||
3801 | \ | ||
3802 | return score; \ | ||
3803 | } | ||
3804 | VSSE_INTRA(8) | ||
3805 | VSSE_INTRA(16) | ||
3806 | |||
3807 | static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | ||
3808 | int score=0; | ||
3809 | int x,y; | ||
3810 | |||
3811 | for(y=1; y<h; y++){ | ||
3812 | for(x=0; x<16; x++){ | ||
3813 | score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | ||
3814 | } | ||
3815 | s1+= stride; | ||
3816 | s2+= stride; | ||
3817 | } | ||
3818 | |||
3819 | return score; | ||
3820 | } | ||
3821 | |||
3822 | static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, | ||
3823 | int size){ | ||
3824 | int score=0; | ||
3825 | int i; | ||
3826 | for(i=0; i<size; i++) | ||
3827 | score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); | ||
3828 | return score; | ||
3829 | } | ||
3830 | |||
3831 | WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) | ||
3832 | WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) | ||
3833 | WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) | ||
3834 | #if CONFIG_GPL | ||
3835 | WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) | ||
3836 | #endif | ||
3837 | WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) | ||
3838 | WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) | ||
3839 | WRAPPER8_16_SQ(rd8x8_c, rd16_c) | ||
3840 | WRAPPER8_16_SQ(bit8x8_c, bit16_c) | ||
3841 | #endif | ||
3842 | static void vector_fmul_c(float *dst, const float *src, int len){ | ||
3843 | int i; | ||
3844 | for(i=0; i<len; i++) | ||
3845 | dst[i] *= src[i]; | ||
3846 | } | ||
3847 | |||
3848 | static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){ | ||
3849 | int i; | ||
3850 | src1 += len-1; | ||
3851 | for(i=0; i<len; i++) | ||
3852 | dst[i] = src0[i] * src1[-i]; | ||
3853 | } | ||
3854 | |||
3855 | void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){ | ||
3856 | int i; | ||
3857 | for(i=0; i<len; i++) | ||
3858 | dst[i*step] = src0[i] * src1[i] + src2[i] + src3; | ||
3859 | } | ||
3860 | |||
3861 | void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){ | ||
3862 | int i,j; | ||
3863 | dst += len; | ||
3864 | win += len; | ||
3865 | src0+= len; | ||
3866 | for(i=-len, j=len-1; i<0; i++, j--) { | ||
3867 | float s0 = src0[i]; | ||
3868 | float s1 = src1[j]; | ||
3869 | float wi = win[i]; | ||
3870 | float wj = win[j]; | ||
3871 | dst[i] = s0*wj - s1*wi + add_bias; | ||
3872 | dst[j] = s0*wi + s1*wj + add_bias; | ||
3873 | } | ||
3874 | } | ||
3875 | #if 0 | ||
3876 | static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){ | ||
3877 | int i; | ||
3878 | for(i=0; i<len; i++) | ||
3879 | dst[i] = src[i] * mul; | ||
3880 | } | ||
3881 | |||
3882 | static av_always_inline int float_to_int16_one(const float *src){ | ||
3883 | int_fast32_t tmp = *(const int32_t*)src; | ||
3884 | if(tmp & 0xf0000){ | ||
3885 | tmp = (0x43c0ffff - tmp)>>31; | ||
3886 | // is this faster on some gcc/cpu combinations? | ||
3887 | // if(tmp > 0x43c0ffff) tmp = 0xFFFF; | ||
3888 | // else tmp = 0; | ||
3889 | } | ||
3890 | return tmp - 0x8000; | ||
3891 | } | ||
3892 | |||
3893 | void ff_float_to_int16_c(int16_t *dst, const float *src, long len){ | ||
3894 | int i; | ||
3895 | for(i=0; i<len; i++) | ||
3896 | dst[i] = float_to_int16_one(src+i); | ||
3897 | } | ||
3898 | |||
3899 | void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){ | ||
3900 | int i,j,c; | ||
3901 | if(channels==2){ | ||
3902 | for(i=0; i<len; i++){ | ||
3903 | dst[2*i] = float_to_int16_one(src[0]+i); | ||
3904 | dst[2*i+1] = float_to_int16_one(src[1]+i); | ||
3905 | } | ||
3906 | }else{ | ||
3907 | for(c=0; c<channels; c++) | ||
3908 | for(i=0, j=c; i<len; i++, j+=channels) | ||
3909 | dst[j] = float_to_int16_one(src[c]+i); | ||
3910 | } | ||
3911 | } | ||
3912 | |||
3913 | static void add_int16_c(int16_t * v1, int16_t * v2, int order) | ||
3914 | { | ||
3915 | while (order--) | ||
3916 | *v1++ += *v2++; | ||
3917 | } | ||
3918 | |||
3919 | static void sub_int16_c(int16_t * v1, int16_t * v2, int order) | ||
3920 | { | ||
3921 | while (order--) | ||
3922 | *v1++ -= *v2++; | ||
3923 | } | ||
3924 | |||
3925 | static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift) | ||
3926 | { | ||
3927 | int res = 0; | ||
3928 | |||
3929 | while (order--) | ||
3930 | res += (*v1++ * *v2++) >> shift; | ||
3931 | |||
3932 | return res; | ||
3933 | } | ||
3934 | |||
3935 | #define W0 2048 | ||
3936 | #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ | ||
3937 | #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ | ||
3938 | #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ | ||
3939 | #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ | ||
3940 | #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ | ||
3941 | #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ | ||
3942 | #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ | ||
3943 | |||
3944 | static void wmv2_idct_row(short * b) | ||
3945 | { | ||
3946 | int s1,s2; | ||
3947 | int a0,a1,a2,a3,a4,a5,a6,a7; | ||
3948 | /*step 1*/ | ||
3949 | a1 = W1*b[1]+W7*b[7]; | ||
3950 | a7 = W7*b[1]-W1*b[7]; | ||
3951 | a5 = W5*b[5]+W3*b[3]; | ||
3952 | a3 = W3*b[5]-W5*b[3]; | ||
3953 | a2 = W2*b[2]+W6*b[6]; | ||
3954 | a6 = W6*b[2]-W2*b[6]; | ||
3955 | a0 = W0*b[0]+W0*b[4]; | ||
3956 | a4 = W0*b[0]-W0*b[4]; | ||
3957 | /*step 2*/ | ||
3958 | s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7, | ||
3959 | s2 = (181*(a1-a5-a7+a3)+128)>>8; | ||
3960 | /*step 3*/ | ||
3961 | b[0] = (a0+a2+a1+a5 + (1<<7))>>8; | ||
3962 | b[1] = (a4+a6 +s1 + (1<<7))>>8; | ||
3963 | b[2] = (a4-a6 +s2 + (1<<7))>>8; | ||
3964 | b[3] = (a0-a2+a7+a3 + (1<<7))>>8; | ||
3965 | b[4] = (a0-a2-a7-a3 + (1<<7))>>8; | ||
3966 | b[5] = (a4-a6 -s2 + (1<<7))>>8; | ||
3967 | b[6] = (a4+a6 -s1 + (1<<7))>>8; | ||
3968 | b[7] = (a0+a2-a1-a5 + (1<<7))>>8; | ||
3969 | } | ||
3970 | static void wmv2_idct_col(short * b) | ||
3971 | { | ||
3972 | int s1,s2; | ||
3973 | int a0,a1,a2,a3,a4,a5,a6,a7; | ||
3974 | /*step 1, with extended precision*/ | ||
3975 | a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3; | ||
3976 | a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3; | ||
3977 | a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3; | ||
3978 | a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3; | ||
3979 | a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3; | ||
3980 | a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3; | ||
3981 | a0 = (W0*b[8*0]+W0*b[8*4] )>>3; | ||
3982 | a4 = (W0*b[8*0]-W0*b[8*4] )>>3; | ||
3983 | /*step 2*/ | ||
3984 | s1 = (181*(a1-a5+a7-a3)+128)>>8; | ||
3985 | s2 = (181*(a1-a5-a7+a3)+128)>>8; | ||
3986 | /*step 3*/ | ||
3987 | b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14; | ||
3988 | b[8*1] = (a4+a6 +s1 + (1<<13))>>14; | ||
3989 | b[8*2] = (a4-a6 +s2 + (1<<13))>>14; | ||
3990 | b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14; | ||
3991 | |||
3992 | b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14; | ||
3993 | b[8*5] = (a4-a6 -s2 + (1<<13))>>14; | ||
3994 | b[8*6] = (a4+a6 -s1 + (1<<13))>>14; | ||
3995 | b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14; | ||
3996 | } | ||
3997 | void ff_wmv2_idct_c(short * block){ | ||
3998 | int i; | ||
3999 | |||
4000 | for(i=0;i<64;i+=8){ | ||
4001 | wmv2_idct_row(block+i); | ||
4002 | } | ||
4003 | for(i=0;i<8;i++){ | ||
4004 | wmv2_idct_col(block+i); | ||
4005 | } | ||
4006 | } | ||
4007 | /* XXX: those functions should be suppressed ASAP when all IDCTs are | ||
4008 | converted */ | ||
4009 | static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block) | ||
4010 | { | ||
4011 | ff_wmv2_idct_c(block); | ||
4012 | put_pixels_clamped_c(block, dest, line_size); | ||
4013 | } | ||
4014 | static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block) | ||
4015 | { | ||
4016 | ff_wmv2_idct_c(block); | ||
4017 | add_pixels_clamped_c(block, dest, line_size); | ||
4018 | } | ||
4019 | static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | ||
4020 | { | ||
4021 | j_rev_dct (block); | ||
4022 | put_pixels_clamped_c(block, dest, line_size); | ||
4023 | } | ||
4024 | static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | ||
4025 | { | ||
4026 | j_rev_dct (block); | ||
4027 | add_pixels_clamped_c(block, dest, line_size); | ||
4028 | } | ||
4029 | |||
4030 | static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) | ||
4031 | { | ||
4032 | j_rev_dct4 (block); | ||
4033 | put_pixels_clamped4_c(block, dest, line_size); | ||
4034 | } | ||
4035 | static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) | ||
4036 | { | ||
4037 | j_rev_dct4 (block); | ||
4038 | add_pixels_clamped4_c(block, dest, line_size); | ||
4039 | } | ||
4040 | |||
4041 | static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) | ||
4042 | { | ||
4043 | j_rev_dct2 (block); | ||
4044 | put_pixels_clamped2_c(block, dest, line_size); | ||
4045 | } | ||
4046 | static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) | ||
4047 | { | ||
4048 | j_rev_dct2 (block); | ||
4049 | add_pixels_clamped2_c(block, dest, line_size); | ||
4050 | } | ||
4051 | |||
4052 | static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) | ||
4053 | { | ||
4054 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
4055 | |||
4056 | dest[0] = cm[(block[0] + 4)>>3]; | ||
4057 | } | ||
4058 | static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) | ||
4059 | { | ||
4060 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||
4061 | |||
4062 | dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; | ||
4063 | } | ||
4064 | |||
4065 | static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } | ||
4066 | #endif | ||
4067 | /* init static data */ | ||
4068 | void dsputil_static_init(void) | ||
4069 | { | ||
4070 | int i; | ||
4071 | |||
4072 | for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i; | ||
4073 | for(i=0;i<MAX_NEG_CROP;i++) { | ||
4074 | ff_cropTbl[i] = 0; | ||
4075 | ff_cropTbl[i + MAX_NEG_CROP + 256] = 255; | ||
4076 | } | ||
4077 | |||
4078 | for(i=0;i<512;i++) { | ||
4079 | ff_squareTbl[i] = (i - 256) * (i - 256); | ||
4080 | } | ||
4081 | |||
4082 | for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; | ||
4083 | } | ||
4084 | |||
4085 | int ff_check_alignment(void){ | ||
4086 | static int did_fail=0; | ||
4087 | DECLARE_ALIGNED_16(int, aligned); | ||
4088 | |||
4089 | if((long)&aligned & 15){ | ||
4090 | if(!did_fail){ | ||
4091 | #if HAVE_MMX || HAVE_ALTIVEC | ||
4092 | av_log(NULL, AV_LOG_ERROR, | ||
4093 | "Compiler did not align stack variables. Libavcodec has been miscompiled\n" | ||
4094 | "and may be very slow or crash. This is not a bug in libavcodec,\n" | ||
4095 | "but in the compiler. You may try recompiling using gcc >= 4.2.\n" | ||
4096 | "Do not report crashes to FFmpeg developers.\n"); | ||
4097 | #endif | ||
4098 | did_fail=1; | ||
4099 | } | ||
4100 | return -1; | ||
4101 | } | ||
4102 | return 0; | ||
4103 | } | ||
4104 | |||
4105 | void dsputil_init(DSPContext* c) | ||
4106 | { | ||
4107 | ff_check_alignment(); | ||
4108 | |||
4109 | c->vector_fmul = vector_fmul_c; | ||
4110 | c->vector_fmul_reverse = vector_fmul_reverse_c; | ||
4111 | c->vector_fmul_add_add = ff_vector_fmul_add_add_c; | ||
4112 | c->vector_fmul_window = ff_vector_fmul_window_c; | ||
4113 | } | ||
4114 | |||
diff --git a/apps/codecs/libatrac/dsputil.h b/apps/codecs/libatrac/dsputil.h deleted file mode 100644 index 3bb0ff77a5..0000000000 --- a/apps/codecs/libatrac/dsputil.h +++ /dev/null | |||
@@ -1,898 +0,0 @@ | |||
1 | /* | ||
2 | * DSP utils | ||
3 | * Copyright (c) 2000, 2001, 2002 Fabrice Bellard | ||
4 | * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | /** | ||
24 | * @file libavcodec/dsputil.h | ||
25 | * DSP utils. | ||
26 | * note, many functions in here may use MMX which trashes the FPU state, it is | ||
27 | * absolutely necessary to call emms_c() between dsp & float/double code | ||
28 | */ | ||
29 | |||
30 | #ifndef AVCODEC_DSPUTIL_H | ||
31 | #define AVCODEC_DSPUTIL_H | ||
32 | |||
33 | #include "libavutil/intreadwrite.h" | ||
34 | #include "avcodec.h" | ||
35 | |||
36 | |||
37 | //#define DEBUG | ||
38 | /* dct code */ | ||
39 | typedef short DCTELEM; | ||
40 | typedef int DWTELEM; | ||
41 | typedef short IDWTELEM; | ||
42 | |||
43 | void fdct_ifast (DCTELEM *data); | ||
44 | void fdct_ifast248 (DCTELEM *data); | ||
45 | void ff_jpeg_fdct_islow (DCTELEM *data); | ||
46 | void ff_fdct248_islow (DCTELEM *data); | ||
47 | |||
48 | void j_rev_dct (DCTELEM *data); | ||
49 | void j_rev_dct4 (DCTELEM *data); | ||
50 | void j_rev_dct2 (DCTELEM *data); | ||
51 | void j_rev_dct1 (DCTELEM *data); | ||
52 | void ff_wmv2_idct_c(DCTELEM *data); | ||
53 | |||
54 | void ff_fdct_mmx(DCTELEM *block); | ||
55 | void ff_fdct_mmx2(DCTELEM *block); | ||
56 | void ff_fdct_sse2(DCTELEM *block); | ||
57 | |||
58 | void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride); | ||
59 | void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); | ||
60 | void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); | ||
61 | void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); | ||
62 | void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); | ||
63 | void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); | ||
64 | void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | ||
65 | void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | ||
66 | void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | ||
67 | void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | ||
68 | |||
69 | void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, | ||
70 | const float *src2, int src3, int blocksize, int step); | ||
71 | void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, | ||
72 | const float *win, float add_bias, int len); | ||
73 | void ff_float_to_int16_c(int16_t *dst, const float *src, long len); | ||
74 | void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels); | ||
75 | |||
76 | /* encoding scans */ | ||
77 | extern const uint8_t ff_alternate_horizontal_scan[64]; | ||
78 | extern const uint8_t ff_alternate_vertical_scan[64]; | ||
79 | extern const uint8_t ff_zigzag_direct[64]; | ||
80 | extern const uint8_t ff_zigzag248_direct[64]; | ||
81 | |||
82 | /* pixel operations */ | ||
83 | #define MAX_NEG_CROP 1024 | ||
84 | |||
85 | /* temporary */ | ||
86 | extern uint32_t ff_squareTbl[512]; | ||
87 | extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP]; | ||
88 | |||
89 | /* VP3 DSP functions */ | ||
90 | void ff_vp3_idct_c(DCTELEM *block/* align 16*/); | ||
91 | void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | ||
92 | void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | ||
93 | |||
94 | void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values); | ||
95 | void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values); | ||
96 | |||
97 | /* VP6 DSP functions */ | ||
98 | void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride, | ||
99 | const int16_t *h_weights, const int16_t *v_weights); | ||
100 | |||
101 | /* 1/2^n downscaling functions from imgconvert.c */ | ||
102 | void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | ||
103 | void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | ||
104 | void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | ||
105 | void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | ||
106 | |||
107 | void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, | ||
108 | int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | ||
109 | |||
110 | /* minimum alignment rules ;) | ||
111 | If you notice errors in the align stuff, need more alignment for some ASM code | ||
112 | for some CPU or need to use a function with less aligned data then send a mail | ||
113 | to the ffmpeg-devel mailing list, ... | ||
114 | |||
115 | !warning These alignments might not match reality, (missing attribute((align)) | ||
116 | stuff somewhere possible). | ||
117 | I (Michael) did not check them, these are just the alignments which I think | ||
118 | could be reached easily ... | ||
119 | |||
120 | !future video codecs might need functions with less strict alignment | ||
121 | */ | ||
122 | |||
123 | /* | ||
124 | void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size); | ||
125 | void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); | ||
126 | void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); | ||
127 | void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); | ||
128 | void clear_blocks_c(DCTELEM *blocks); | ||
129 | */ | ||
130 | |||
131 | /* add and put pixel (decoding) */ | ||
132 | // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 | ||
133 | //h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4 | ||
134 | typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h); | ||
135 | typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); | ||
136 | typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); | ||
137 | typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); | ||
138 | typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); | ||
139 | typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset); | ||
140 | |||
141 | #define DEF_OLD_QPEL(name)\ | ||
142 | void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ | ||
143 | void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ | ||
144 | void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); | ||
145 | |||
146 | DEF_OLD_QPEL(qpel16_mc11_old_c) | ||
147 | DEF_OLD_QPEL(qpel16_mc31_old_c) | ||
148 | DEF_OLD_QPEL(qpel16_mc12_old_c) | ||
149 | DEF_OLD_QPEL(qpel16_mc32_old_c) | ||
150 | DEF_OLD_QPEL(qpel16_mc13_old_c) | ||
151 | DEF_OLD_QPEL(qpel16_mc33_old_c) | ||
152 | DEF_OLD_QPEL(qpel8_mc11_old_c) | ||
153 | DEF_OLD_QPEL(qpel8_mc31_old_c) | ||
154 | DEF_OLD_QPEL(qpel8_mc12_old_c) | ||
155 | DEF_OLD_QPEL(qpel8_mc32_old_c) | ||
156 | DEF_OLD_QPEL(qpel8_mc13_old_c) | ||
157 | DEF_OLD_QPEL(qpel8_mc33_old_c) | ||
158 | |||
159 | #define CALL_2X_PIXELS(a, b, n)\ | ||
160 | static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||
161 | b(block , pixels , line_size, h);\ | ||
162 | b(block+n, pixels+n, line_size, h);\ | ||
163 | } | ||
164 | |||
165 | /* motion estimation */ | ||
166 | // h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2 | ||
167 | // although currently h<4 is not used as functions with width <8 are neither used nor implemented | ||
168 | typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; | ||
169 | |||
170 | |||
171 | // for snow slices | ||
172 | typedef struct slice_buffer_s slice_buffer; | ||
173 | |||
174 | /** | ||
175 | * Scantable. | ||
176 | */ | ||
177 | typedef struct ScanTable{ | ||
178 | const uint8_t *scantable; | ||
179 | uint8_t permutated[64]; | ||
180 | uint8_t raster_end[64]; | ||
181 | #if ARCH_PPC | ||
182 | /** Used by dct_quantize_altivec to find last-non-zero */ | ||
183 | DECLARE_ALIGNED(16, uint8_t, inverse[64]); | ||
184 | #endif | ||
185 | } ScanTable; | ||
186 | |||
187 | void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable); | ||
188 | |||
189 | void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, | ||
190 | int block_w, int block_h, | ||
191 | int src_x, int src_y, int w, int h); | ||
192 | |||
193 | /** | ||
194 | * DSPContext. | ||
195 | */ | ||
196 | typedef struct DSPContext { | ||
197 | /* pixel ops : interface with DCT */ | ||
198 | void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); | ||
199 | void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); | ||
200 | void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||
201 | void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||
202 | void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); | ||
203 | void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size); | ||
204 | void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size); | ||
205 | int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/); | ||
206 | /** | ||
207 | * translational global motion compensation. | ||
208 | */ | ||
209 | void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); | ||
210 | /** | ||
211 | * global motion compensation. | ||
212 | */ | ||
213 | void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, | ||
214 | int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | ||
215 | void (*clear_block)(DCTELEM *block/*align 16*/); | ||
216 | void (*clear_blocks)(DCTELEM *blocks/*align 16*/); | ||
217 | int (*pix_sum)(uint8_t * pix, int line_size); | ||
218 | int (*pix_norm1)(uint8_t * pix, int line_size); | ||
219 | // 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4 | ||
220 | |||
221 | me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */ | ||
222 | me_cmp_func sse[6]; | ||
223 | me_cmp_func hadamard8_diff[6]; | ||
224 | me_cmp_func dct_sad[6]; | ||
225 | me_cmp_func quant_psnr[6]; | ||
226 | me_cmp_func bit[6]; | ||
227 | me_cmp_func rd[6]; | ||
228 | me_cmp_func vsad[6]; | ||
229 | me_cmp_func vsse[6]; | ||
230 | me_cmp_func nsse[6]; | ||
231 | me_cmp_func w53[6]; | ||
232 | me_cmp_func w97[6]; | ||
233 | me_cmp_func dct_max[6]; | ||
234 | me_cmp_func dct264_sad[6]; | ||
235 | |||
236 | me_cmp_func me_pre_cmp[6]; | ||
237 | me_cmp_func me_cmp[6]; | ||
238 | me_cmp_func me_sub_cmp[6]; | ||
239 | me_cmp_func mb_cmp[6]; | ||
240 | me_cmp_func ildct_cmp[6]; //only width 16 used | ||
241 | me_cmp_func frame_skip_cmp[6]; //only width 8 used | ||
242 | |||
243 | int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, | ||
244 | int size); | ||
245 | |||
246 | /** | ||
247 | * Halfpel motion compensation with rounding (a+b+1)>>1. | ||
248 | * this is an array[4][4] of motion compensation functions for 4 | ||
249 | * horizontal blocksizes (8,16) and the 4 halfpel positions<br> | ||
250 | * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] | ||
251 | * @param block destination where the result is stored | ||
252 | * @param pixels source | ||
253 | * @param line_size number of bytes in a horizontal line of block | ||
254 | * @param h height | ||
255 | */ | ||
256 | op_pixels_func put_pixels_tab[4][4]; | ||
257 | |||
258 | /** | ||
259 | * Halfpel motion compensation with rounding (a+b+1)>>1. | ||
260 | * This is an array[4][4] of motion compensation functions for 4 | ||
261 | * horizontal blocksizes (8,16) and the 4 halfpel positions<br> | ||
262 | * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] | ||
263 | * @param block destination into which the result is averaged (a+b+1)>>1 | ||
264 | * @param pixels source | ||
265 | * @param line_size number of bytes in a horizontal line of block | ||
266 | * @param h height | ||
267 | */ | ||
268 | op_pixels_func avg_pixels_tab[4][4]; | ||
269 | |||
270 | /** | ||
271 | * Halfpel motion compensation with no rounding (a+b)>>1. | ||
272 | * this is an array[2][4] of motion compensation functions for 2 | ||
273 | * horizontal blocksizes (8,16) and the 4 halfpel positions<br> | ||
274 | * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] | ||
275 | * @param block destination where the result is stored | ||
276 | * @param pixels source | ||
277 | * @param line_size number of bytes in a horizontal line of block | ||
278 | * @param h height | ||
279 | */ | ||
280 | op_pixels_func put_no_rnd_pixels_tab[4][4]; | ||
281 | |||
282 | /** | ||
283 | * Halfpel motion compensation with no rounding (a+b)>>1. | ||
284 | * this is an array[2][4] of motion compensation functions for 2 | ||
285 | * horizontal blocksizes (8,16) and the 4 halfpel positions<br> | ||
286 | * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] | ||
287 | * @param block destination into which the result is averaged (a+b)>>1 | ||
288 | * @param pixels source | ||
289 | * @param line_size number of bytes in a horizontal line of block | ||
290 | * @param h height | ||
291 | */ | ||
292 | op_pixels_func avg_no_rnd_pixels_tab[4][4]; | ||
293 | |||
294 | void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h); | ||
295 | |||
296 | /** | ||
297 | * Thirdpel motion compensation with rounding (a+b+1)>>1. | ||
298 | * this is an array[12] of motion compensation functions for the 9 thirdpe | ||
299 | * positions<br> | ||
300 | * *pixels_tab[ xthirdpel + 4*ythirdpel ] | ||
301 | * @param block destination where the result is stored | ||
302 | * @param pixels source | ||
303 | * @param line_size number of bytes in a horizontal line of block | ||
304 | * @param h height | ||
305 | */ | ||
306 | tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width? | ||
307 | tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width? | ||
308 | |||
309 | qpel_mc_func put_qpel_pixels_tab[2][16]; | ||
310 | qpel_mc_func avg_qpel_pixels_tab[2][16]; | ||
311 | qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | ||
312 | qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; | ||
313 | qpel_mc_func put_mspel_pixels_tab[8]; | ||
314 | |||
315 | /** | ||
316 | * h264 Chroma MC | ||
317 | */ | ||
318 | h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; | ||
319 | /* This is really one func used in VC-1 decoding */ | ||
320 | h264_chroma_mc_func put_no_rnd_h264_chroma_pixels_tab[3]; | ||
321 | h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; | ||
322 | |||
323 | qpel_mc_func put_h264_qpel_pixels_tab[4][16]; | ||
324 | qpel_mc_func avg_h264_qpel_pixels_tab[4][16]; | ||
325 | |||
326 | qpel_mc_func put_2tap_qpel_pixels_tab[4][16]; | ||
327 | qpel_mc_func avg_2tap_qpel_pixels_tab[4][16]; | ||
328 | |||
329 | h264_weight_func weight_h264_pixels_tab[10]; | ||
330 | h264_biweight_func biweight_h264_pixels_tab[10]; | ||
331 | |||
332 | /* AVS specific */ | ||
333 | qpel_mc_func put_cavs_qpel_pixels_tab[2][16]; | ||
334 | qpel_mc_func avg_cavs_qpel_pixels_tab[2][16]; | ||
335 | void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); | ||
336 | void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); | ||
337 | void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); | ||
338 | void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); | ||
339 | void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride); | ||
340 | |||
341 | me_cmp_func pix_abs[2][4]; | ||
342 | |||
343 | /* huffyuv specific */ | ||
344 | void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); | ||
345 | void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w); | ||
346 | void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); | ||
347 | /** | ||
348 | * subtract huffyuv's variant of median prediction | ||
349 | * note, this might read from src1[-1], src2[-1] | ||
350 | */ | ||
351 | void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top); | ||
352 | void (*add_hfyu_median_prediction)(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top); | ||
353 | /* this might write to dst[w] */ | ||
354 | void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); | ||
355 | void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); | ||
356 | |||
357 | void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); | ||
358 | void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0); | ||
359 | /* v/h_loop_filter_luma_intra: align 16 */ | ||
360 | void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); | ||
361 | void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); | ||
362 | void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); | ||
363 | void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0); | ||
364 | void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); | ||
365 | void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); | ||
366 | // h264_loop_filter_strength: simd only. the C version is inlined in h264.c | ||
367 | void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], | ||
368 | int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); | ||
369 | |||
370 | void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); | ||
371 | void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); | ||
372 | |||
373 | void (*h261_loop_filter)(uint8_t *src, int stride); | ||
374 | |||
375 | void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale); | ||
376 | void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale); | ||
377 | |||
378 | void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values); | ||
379 | void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values); | ||
380 | |||
381 | void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride, | ||
382 | const int16_t *h_weights,const int16_t *v_weights); | ||
383 | |||
384 | /* assume len is a multiple of 4, and arrays are 16-byte aligned */ | ||
385 | void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); | ||
386 | void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); | ||
387 | /* no alignment needed */ | ||
388 | void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc); | ||
389 | /* assume len is a multiple of 8, and arrays are 16-byte aligned */ | ||
390 | void (*vector_fmul)(float *dst, const float *src, int len); | ||
391 | void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); | ||
392 | /* assume len is a multiple of 8, and src arrays are 16-byte aligned */ | ||
393 | void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step); | ||
394 | /* assume len is a multiple of 4, and arrays are 16-byte aligned */ | ||
395 | void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len); | ||
396 | /* assume len is a multiple of 8, and arrays are 16-byte aligned */ | ||
397 | void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); | ||
398 | |||
399 | /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767] | ||
400 | * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */ | ||
401 | void (*float_to_int16)(int16_t *dst, const float *src, long len); | ||
402 | void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels); | ||
403 | |||
404 | /* (I)DCT */ | ||
405 | void (*fdct)(DCTELEM *block/* align 16*/); | ||
406 | void (*fdct248)(DCTELEM *block/* align 16*/); | ||
407 | |||
408 | /* IDCT really*/ | ||
409 | void (*idct)(DCTELEM *block/* align 16*/); | ||
410 | |||
411 | /** | ||
412 | * block -> idct -> clip to unsigned 8 bit -> dest. | ||
413 | * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...) | ||
414 | * @param line_size size in bytes of a horizontal line of dest | ||
415 | */ | ||
416 | void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | ||
417 | |||
418 | /** | ||
419 | * block -> idct -> add dest -> clip to unsigned 8 bit -> dest. | ||
420 | * @param line_size size in bytes of a horizontal line of dest | ||
421 | */ | ||
422 | void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | ||
423 | |||
424 | /** | ||
425 | * idct input permutation. | ||
426 | * several optimized IDCTs need a permutated input (relative to the normal order of the reference | ||
427 | * IDCT) | ||
428 | * this permutation must be performed before the idct_put/add, note, normally this can be merged | ||
429 | * with the zigzag/alternate scan<br> | ||
430 | * an example to avoid confusion: | ||
431 | * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...) | ||
432 | * - (x -> referece dct -> reference idct -> x) | ||
433 | * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x) | ||
434 | * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...) | ||
435 | */ | ||
436 | uint8_t idct_permutation[64]; | ||
437 | int idct_permutation_type; | ||
438 | #define FF_NO_IDCT_PERM 1 | ||
439 | #define FF_LIBMPEG2_IDCT_PERM 2 | ||
440 | #define FF_SIMPLE_IDCT_PERM 3 | ||
441 | #define FF_TRANSPOSE_IDCT_PERM 4 | ||
442 | #define FF_PARTTRANS_IDCT_PERM 5 | ||
443 | #define FF_SSE2_IDCT_PERM 6 | ||
444 | |||
445 | int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale); | ||
446 | void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); | ||
447 | #define BASIS_SHIFT 16 | ||
448 | #define RECON_SHIFT 6 | ||
449 | |||
450 | void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w); | ||
451 | #define EDGE_WIDTH 16 | ||
452 | |||
453 | /* h264 functions */ | ||
454 | /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them | ||
455 | NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them | ||
456 | The reason for above, is that no 2 out of one list may use a different permutation. | ||
457 | */ | ||
458 | void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); | ||
459 | void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); | ||
460 | void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); | ||
461 | void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); | ||
462 | void (*h264_dct)(DCTELEM block[4][4]); | ||
463 | void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); | ||
464 | void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); | ||
465 | void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); | ||
466 | void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); | ||
467 | |||
468 | /* snow wavelet */ | ||
469 | void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); | ||
470 | void (*horizontal_compose97i)(IDWTELEM *b, int width); | ||
471 | void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | ||
472 | |||
473 | void (*prefetch)(void *mem, int stride, int h); | ||
474 | |||
475 | void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); | ||
476 | |||
477 | /* vc1 functions */ | ||
478 | void (*vc1_inv_trans_8x8)(DCTELEM *b); | ||
479 | void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block); | ||
480 | void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block); | ||
481 | void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block); | ||
482 | void (*vc1_v_overlap)(uint8_t* src, int stride); | ||
483 | void (*vc1_h_overlap)(uint8_t* src, int stride); | ||
484 | /* put 8x8 block with bicubic interpolation and quarterpel precision | ||
485 | * last argument is actually round value instead of height | ||
486 | */ | ||
487 | op_pixels_func put_vc1_mspel_pixels_tab[16]; | ||
488 | |||
489 | /* intrax8 functions */ | ||
490 | void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize); | ||
491 | void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize, | ||
492 | int * range, int * sum, int edges); | ||
493 | |||
494 | /* ape functions */ | ||
495 | /** | ||
496 | * Add contents of the second vector to the first one. | ||
497 | * @param len length of vectors, should be multiple of 16 | ||
498 | */ | ||
499 | void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len); | ||
500 | /** | ||
501 | * Add contents of the second vector to the first one. | ||
502 | * @param len length of vectors, should be multiple of 16 | ||
503 | */ | ||
504 | void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len); | ||
505 | /** | ||
506 | * Calculate scalar product of two vectors. | ||
507 | * @param len length of vectors, should be multiple of 16 | ||
508 | * @param shift number of bits to discard from product | ||
509 | */ | ||
510 | int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift); | ||
511 | |||
512 | /* rv30 functions */ | ||
513 | qpel_mc_func put_rv30_tpel_pixels_tab[4][16]; | ||
514 | qpel_mc_func avg_rv30_tpel_pixels_tab[4][16]; | ||
515 | |||
516 | /* rv40 functions */ | ||
517 | qpel_mc_func put_rv40_qpel_pixels_tab[4][16]; | ||
518 | qpel_mc_func avg_rv40_qpel_pixels_tab[4][16]; | ||
519 | h264_chroma_mc_func put_rv40_chroma_pixels_tab[3]; | ||
520 | h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3]; | ||
521 | } DSPContext; | ||
522 | |||
523 | void dsputil_static_init(void); | ||
524 | void dsputil_init(DSPContext* p); | ||
525 | |||
526 | int ff_check_alignment(void); | ||
527 | |||
528 | /** | ||
529 | * permute block according to permuatation. | ||
530 | * @param last last non zero element in scantable order | ||
531 | */ | ||
532 | void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last); | ||
533 | |||
534 | void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); | ||
535 | |||
536 | #define BYTE_VEC32(c) ((c)*0x01010101UL) | ||
537 | |||
538 | static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) | ||
539 | { | ||
540 | return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); | ||
541 | } | ||
542 | |||
543 | static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b) | ||
544 | { | ||
545 | return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); | ||
546 | } | ||
547 | |||
548 | static inline int get_penalty_factor(int lambda, int lambda2, int type){ | ||
549 | switch(type&0xFF){ | ||
550 | default: | ||
551 | case FF_CMP_SAD: | ||
552 | return lambda>>FF_LAMBDA_SHIFT; | ||
553 | case FF_CMP_DCT: | ||
554 | return (3*lambda)>>(FF_LAMBDA_SHIFT+1); | ||
555 | case FF_CMP_W53: | ||
556 | return (4*lambda)>>(FF_LAMBDA_SHIFT); | ||
557 | case FF_CMP_W97: | ||
558 | return (2*lambda)>>(FF_LAMBDA_SHIFT); | ||
559 | case FF_CMP_SATD: | ||
560 | case FF_CMP_DCT264: | ||
561 | return (2*lambda)>>FF_LAMBDA_SHIFT; | ||
562 | case FF_CMP_RD: | ||
563 | case FF_CMP_PSNR: | ||
564 | case FF_CMP_SSE: | ||
565 | case FF_CMP_NSSE: | ||
566 | return lambda2>>FF_LAMBDA_SHIFT; | ||
567 | case FF_CMP_BIT: | ||
568 | return 1; | ||
569 | } | ||
570 | } | ||
571 | |||
572 | /** | ||
573 | * Empty mmx state. | ||
574 | * this must be called between any dsp function and float/double code. | ||
575 | * for example sin(); dsp->idct_put(); emms_c(); cos() | ||
576 | */ | ||
577 | #define emms_c() | ||
578 | |||
579 | /* should be defined by architectures supporting | ||
580 | one or more MultiMedia extension */ | ||
581 | int mm_support(void); | ||
582 | |||
583 | void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); | ||
584 | void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx); | ||
585 | void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx); | ||
586 | void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); | ||
587 | void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); | ||
588 | void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); | ||
589 | void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); | ||
590 | void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); | ||
591 | void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); | ||
592 | |||
593 | #define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v) | ||
594 | |||
595 | #if HAVE_MMX | ||
596 | |||
597 | #undef emms_c | ||
598 | |||
599 | extern int mm_flags; | ||
600 | |||
601 | void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | ||
602 | void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | ||
603 | void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); | ||
604 | |||
605 | static inline void emms(void) | ||
606 | { | ||
607 | __asm__ volatile ("emms;":::"memory"); | ||
608 | } | ||
609 | |||
610 | |||
611 | #define emms_c() \ | ||
612 | {\ | ||
613 | if (mm_flags & FF_MM_MMX)\ | ||
614 | emms();\ | ||
615 | } | ||
616 | |||
617 | void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx); | ||
618 | |||
619 | #elif ARCH_ARM | ||
620 | |||
621 | extern int mm_flags; | ||
622 | |||
623 | #if HAVE_NEON | ||
624 | # define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v) | ||
625 | # define STRIDE_ALIGN 16 | ||
626 | #endif | ||
627 | |||
628 | #elif ARCH_PPC | ||
629 | |||
630 | extern int mm_flags; | ||
631 | |||
632 | #define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v) | ||
633 | #define STRIDE_ALIGN 16 | ||
634 | |||
635 | #elif HAVE_MMI | ||
636 | |||
637 | #define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v) | ||
638 | #define STRIDE_ALIGN 16 | ||
639 | |||
640 | #else | ||
641 | |||
642 | #define mm_flags 0 | ||
643 | #define mm_support() 0 | ||
644 | |||
645 | #endif | ||
646 | |||
647 | #ifndef DECLARE_ALIGNED_8 | ||
648 | # define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(8, t, v) | ||
649 | #endif | ||
650 | |||
651 | #ifndef STRIDE_ALIGN | ||
652 | # define STRIDE_ALIGN 8 | ||
653 | #endif | ||
654 | |||
655 | /* PSNR */ | ||
656 | void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3], | ||
657 | int orig_linesize[3], int coded_linesize, | ||
658 | AVCodecContext *avctx); | ||
659 | |||
660 | /* FFT computation */ | ||
661 | |||
662 | /* NOTE: soon integer code will be added, so you must use the | ||
663 | FFTSample type */ | ||
664 | typedef float FFTSample; | ||
665 | |||
666 | struct MDCTContext; | ||
667 | |||
668 | typedef struct FFTComplex { | ||
669 | FFTSample re, im; | ||
670 | } FFTComplex; | ||
671 | |||
672 | typedef struct FFTContext { | ||
673 | int nbits; | ||
674 | int inverse; | ||
675 | uint16_t *revtab; | ||
676 | FFTComplex *exptab; | ||
677 | FFTComplex *exptab1; /* only used by SSE code */ | ||
678 | FFTComplex *tmp_buf; | ||
679 | void (*fft_permute)(struct FFTContext *s, FFTComplex *z); | ||
680 | void (*fft_calc)(struct FFTContext *s, FFTComplex *z); | ||
681 | void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); | ||
682 | void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); | ||
683 | } FFTContext; | ||
684 | |||
685 | extern FFTSample* ff_cos_tabs[13]; | ||
686 | |||
687 | /** | ||
688 | * Sets up a complex FFT. | ||
689 | * @param nbits log2 of the length of the input array | ||
690 | * @param inverse if 0 perform the forward transform, if 1 perform the inverse | ||
691 | */ | ||
692 | int ff_fft_init(FFTContext *s, int nbits, int inverse); | ||
693 | void ff_fft_permute_c(FFTContext *s, FFTComplex *z); | ||
694 | void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); | ||
695 | void ff_fft_calc_c(FFTContext *s, FFTComplex *z); | ||
696 | void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); | ||
697 | void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); | ||
698 | void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); | ||
699 | void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z); | ||
700 | |||
701 | /** | ||
702 | * Do the permutation needed BEFORE calling ff_fft_calc(). | ||
703 | */ | ||
704 | static inline void ff_fft_permute(FFTContext *s, FFTComplex *z) | ||
705 | { | ||
706 | s->fft_permute(s, z); | ||
707 | } | ||
708 | /** | ||
709 | * Do a complex FFT with the parameters defined in ff_fft_init(). The | ||
710 | * input data must be permuted before. No 1.0/sqrt(n) normalization is done. | ||
711 | */ | ||
712 | static inline void ff_fft_calc(FFTContext *s, FFTComplex *z) | ||
713 | { | ||
714 | s->fft_calc(s, z); | ||
715 | } | ||
716 | void ff_fft_end(FFTContext *s); | ||
717 | |||
718 | /* MDCT computation */ | ||
719 | |||
720 | typedef struct MDCTContext { | ||
721 | int n; /* size of MDCT (i.e. number of input data * 2) */ | ||
722 | int nbits; /* n = 2^nbits */ | ||
723 | /* pre/post rotation tables */ | ||
724 | FFTSample *tcos; | ||
725 | FFTSample *tsin; | ||
726 | FFTContext fft; | ||
727 | } MDCTContext; | ||
728 | |||
729 | static inline void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input) | ||
730 | { | ||
731 | s->fft.imdct_calc(s, output, input); | ||
732 | } | ||
733 | static inline void ff_imdct_half(MDCTContext *s, FFTSample *output, const FFTSample *input) | ||
734 | { | ||
735 | s->fft.imdct_half(s, output, input); | ||
736 | } | ||
737 | |||
738 | /** | ||
739 | * Generate a Kaiser-Bessel Derived Window. | ||
740 | * @param window pointer to half window | ||
741 | * @param alpha determines window shape | ||
742 | * @param n size of half window | ||
743 | */ | ||
744 | void ff_kbd_window_init(float *window, float alpha, int n); | ||
745 | |||
746 | /** | ||
747 | * Generate a sine window. | ||
748 | * @param window pointer to half window | ||
749 | * @param n size of half window | ||
750 | */ | ||
751 | void ff_sine_window_init(float *window, int n); | ||
752 | extern float ff_sine_128 [ 128]; | ||
753 | extern float ff_sine_256 [ 256]; | ||
754 | extern float ff_sine_512 [ 512]; | ||
755 | extern float ff_sine_1024[1024]; | ||
756 | extern float ff_sine_2048[2048]; | ||
757 | extern float ff_sine_4096[4096]; | ||
758 | extern float *ff_sine_windows[6]; | ||
759 | |||
760 | int ff_mdct_init(MDCTContext *s, int nbits, int inverse); | ||
761 | void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input); | ||
762 | void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input); | ||
763 | void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); | ||
764 | void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); | ||
765 | void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); | ||
766 | void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); | ||
767 | void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); | ||
768 | void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); | ||
769 | void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input); | ||
770 | void ff_mdct_end(MDCTContext *s); | ||
771 | |||
772 | /* Real Discrete Fourier Transform */ | ||
773 | |||
774 | enum RDFTransformType { | ||
775 | RDFT, | ||
776 | IRDFT, | ||
777 | RIDFT, | ||
778 | IRIDFT, | ||
779 | }; | ||
780 | |||
781 | typedef struct { | ||
782 | int nbits; | ||
783 | int inverse; | ||
784 | int sign_convention; | ||
785 | |||
786 | /* pre/post rotation tables */ | ||
787 | FFTSample *tcos; | ||
788 | FFTSample *tsin; | ||
789 | FFTContext fft; | ||
790 | } RDFTContext; | ||
791 | |||
792 | /** | ||
793 | * Sets up a real FFT. | ||
794 | * @param nbits log2 of the length of the input array | ||
795 | * @param trans the type of transform | ||
796 | */ | ||
797 | int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans); | ||
798 | void ff_rdft_calc(RDFTContext *s, FFTSample *data); | ||
799 | void ff_rdft_end(RDFTContext *s); | ||
800 | |||
801 | #define WRAPPER8_16(name8, name16)\ | ||
802 | static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ | ||
803 | return name8(s, dst , src , stride, h)\ | ||
804 | +name8(s, dst+8 , src+8 , stride, h);\ | ||
805 | } | ||
806 | |||
807 | #define WRAPPER8_16_SQ(name8, name16)\ | ||
808 | static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ | ||
809 | int score=0;\ | ||
810 | score +=name8(s, dst , src , stride, 8);\ | ||
811 | score +=name8(s, dst+8 , src+8 , stride, 8);\ | ||
812 | if(h==16){\ | ||
813 | dst += 8*stride;\ | ||
814 | src += 8*stride;\ | ||
815 | score +=name8(s, dst , src , stride, 8);\ | ||
816 | score +=name8(s, dst+8 , src+8 , stride, 8);\ | ||
817 | }\ | ||
818 | return score;\ | ||
819 | } | ||
820 | |||
821 | |||
822 | static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | ||
823 | { | ||
824 | int i; | ||
825 | for(i=0; i<h; i++) | ||
826 | { | ||
827 | AV_WN16(dst , AV_RN16(src )); | ||
828 | dst+=dstStride; | ||
829 | src+=srcStride; | ||
830 | } | ||
831 | } | ||
832 | |||
833 | static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | ||
834 | { | ||
835 | int i; | ||
836 | for(i=0; i<h; i++) | ||
837 | { | ||
838 | AV_WN32(dst , AV_RN32(src )); | ||
839 | dst+=dstStride; | ||
840 | src+=srcStride; | ||
841 | } | ||
842 | } | ||
843 | |||
844 | static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | ||
845 | { | ||
846 | int i; | ||
847 | for(i=0; i<h; i++) | ||
848 | { | ||
849 | AV_WN32(dst , AV_RN32(src )); | ||
850 | AV_WN32(dst+4 , AV_RN32(src+4 )); | ||
851 | dst+=dstStride; | ||
852 | src+=srcStride; | ||
853 | } | ||
854 | } | ||
855 | |||
856 | static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | ||
857 | { | ||
858 | int i; | ||
859 | for(i=0; i<h; i++) | ||
860 | { | ||
861 | AV_WN32(dst , AV_RN32(src )); | ||
862 | AV_WN32(dst+4 , AV_RN32(src+4 )); | ||
863 | dst[8]= src[8]; | ||
864 | dst+=dstStride; | ||
865 | src+=srcStride; | ||
866 | } | ||
867 | } | ||
868 | |||
869 | static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | ||
870 | { | ||
871 | int i; | ||
872 | for(i=0; i<h; i++) | ||
873 | { | ||
874 | AV_WN32(dst , AV_RN32(src )); | ||
875 | AV_WN32(dst+4 , AV_RN32(src+4 )); | ||
876 | AV_WN32(dst+8 , AV_RN32(src+8 )); | ||
877 | AV_WN32(dst+12, AV_RN32(src+12)); | ||
878 | dst+=dstStride; | ||
879 | src+=srcStride; | ||
880 | } | ||
881 | } | ||
882 | |||
883 | static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | ||
884 | { | ||
885 | int i; | ||
886 | for(i=0; i<h; i++) | ||
887 | { | ||
888 | AV_WN32(dst , AV_RN32(src )); | ||
889 | AV_WN32(dst+4 , AV_RN32(src+4 )); | ||
890 | AV_WN32(dst+8 , AV_RN32(src+8 )); | ||
891 | AV_WN32(dst+12, AV_RN32(src+12)); | ||
892 | dst[16]= src[16]; | ||
893 | dst+=dstStride; | ||
894 | src+=srcStride; | ||
895 | } | ||
896 | } | ||
897 | |||
898 | #endif /* AVCODEC_DSPUTIL_H */ | ||
diff --git a/apps/codecs/libatrac/fft.c b/apps/codecs/libatrac/fft.c deleted file mode 100644 index a3f1151472..0000000000 --- a/apps/codecs/libatrac/fft.c +++ /dev/null | |||
@@ -1,374 +0,0 @@ | |||
1 | /* | ||
2 | * FFT/IFFT transforms | ||
3 | * Copyright (c) 2008 Loren Merritt | ||
4 | * Copyright (c) 2002 Fabrice Bellard | ||
5 | * Partly based on libdjbfft by D. J. Bernstein | ||
6 | * | ||
7 | * This file is part of FFmpeg. | ||
8 | * | ||
9 | * FFmpeg is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU Lesser General Public | ||
11 | * License as published by the Free Software Foundation; either | ||
12 | * version 2.1 of the License, or (at your option) any later version. | ||
13 | * | ||
14 | * FFmpeg is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * Lesser General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU Lesser General Public | ||
20 | * License along with FFmpeg; if not, write to the Free Software | ||
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
22 | */ | ||
23 | |||
24 | /** | ||
25 | * @file libavcodec/fft.c | ||
26 | * FFT/IFFT transforms. | ||
27 | */ | ||
28 | |||
29 | #include "dsputil.h" | ||
30 | |||
31 | /* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */ | ||
32 | DECLARE_ALIGNED_16(FFTSample, ff_cos_16[8]); | ||
33 | DECLARE_ALIGNED_16(FFTSample, ff_cos_32[16]); | ||
34 | DECLARE_ALIGNED_16(FFTSample, ff_cos_64[32]); | ||
35 | DECLARE_ALIGNED_16(FFTSample, ff_cos_128[64]); | ||
36 | DECLARE_ALIGNED_16(FFTSample, ff_cos_256[128]); | ||
37 | DECLARE_ALIGNED_16(FFTSample, ff_cos_512[256]); | ||
38 | DECLARE_ALIGNED_16(FFTSample, ff_cos_1024[512]); | ||
39 | DECLARE_ALIGNED_16(FFTSample, ff_cos_2048[1024]); | ||
40 | DECLARE_ALIGNED_16(FFTSample, ff_cos_4096[2048]); | ||
41 | DECLARE_ALIGNED_16(FFTSample, ff_cos_8192[4096]); | ||
42 | DECLARE_ALIGNED_16(FFTSample, ff_cos_16384[8192]); | ||
43 | DECLARE_ALIGNED_16(FFTSample, ff_cos_32768[16384]); | ||
44 | DECLARE_ALIGNED_16(FFTSample, ff_cos_65536[32768]); | ||
45 | FFTSample *ff_cos_tabs[] = { | ||
46 | ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024, | ||
47 | ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536, | ||
48 | }; | ||
49 | |||
50 | static int split_radix_permutation(int i, int n, int inverse) | ||
51 | { | ||
52 | int m; | ||
53 | if(n <= 2) return i&1; | ||
54 | m = n >> 1; | ||
55 | if(!(i&m)) return split_radix_permutation(i, m, inverse)*2; | ||
56 | m >>= 1; | ||
57 | if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1; | ||
58 | else return split_radix_permutation(i, m, inverse)*4 - 1; | ||
59 | } | ||
60 | |||
61 | av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) | ||
62 | { | ||
63 | int i, j, m, n; | ||
64 | float alpha, c1, s1, s2; | ||
65 | int split_radix = 1; | ||
66 | int av_unused has_vectors; | ||
67 | |||
68 | if (nbits < 2 || nbits > 16) | ||
69 | goto fail; | ||
70 | s->nbits = nbits; | ||
71 | n = 1 << nbits; | ||
72 | |||
73 | s->tmp_buf = NULL; | ||
74 | s->exptab = av_malloc((n / 2) * sizeof(FFTComplex)); | ||
75 | if (!s->exptab) | ||
76 | goto fail; | ||
77 | s->revtab = av_malloc(n * sizeof(uint16_t)); | ||
78 | if (!s->revtab) | ||
79 | goto fail; | ||
80 | s->inverse = inverse; | ||
81 | |||
82 | s2 = inverse ? 1.0 : -1.0; | ||
83 | |||
84 | s->fft_permute = ff_fft_permute_c; | ||
85 | s->fft_calc = ff_fft_calc_c; | ||
86 | s->imdct_calc = ff_imdct_calc_c; | ||
87 | s->imdct_half = ff_imdct_half_c; | ||
88 | s->exptab1 = NULL; | ||
89 | |||
90 | #if HAVE_MMX && HAVE_YASM | ||
91 | has_vectors = mm_support(); | ||
92 | if (has_vectors & FF_MM_SSE && HAVE_SSE) { | ||
93 | /* SSE for P3/P4/K8 */ | ||
94 | s->imdct_calc = ff_imdct_calc_sse; | ||
95 | s->imdct_half = ff_imdct_half_sse; | ||
96 | s->fft_permute = ff_fft_permute_sse; | ||
97 | s->fft_calc = ff_fft_calc_sse; | ||
98 | } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) { | ||
99 | /* 3DNowEx for K7 */ | ||
100 | s->imdct_calc = ff_imdct_calc_3dn2; | ||
101 | s->imdct_half = ff_imdct_half_3dn2; | ||
102 | s->fft_calc = ff_fft_calc_3dn2; | ||
103 | } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) { | ||
104 | /* 3DNow! for K6-2/3 */ | ||
105 | s->imdct_calc = ff_imdct_calc_3dn; | ||
106 | s->imdct_half = ff_imdct_half_3dn; | ||
107 | s->fft_calc = ff_fft_calc_3dn; | ||
108 | } | ||
109 | #elif HAVE_ALTIVEC | ||
110 | has_vectors = mm_support(); | ||
111 | if (has_vectors & FF_MM_ALTIVEC) { | ||
112 | s->fft_calc = ff_fft_calc_altivec; | ||
113 | split_radix = 0; | ||
114 | } | ||
115 | #endif | ||
116 | |||
117 | if (split_radix) { | ||
118 | for(j=4; j<=nbits; j++) { | ||
119 | int m = 1<<j; | ||
120 | double freq = 2*M_PI/m; | ||
121 | FFTSample *tab = ff_cos_tabs[j-4]; | ||
122 | for(i=0; i<=m/4; i++) | ||
123 | tab[i] = cos(i*freq); | ||
124 | for(i=1; i<m/4; i++) | ||
125 | tab[m/2-i] = tab[i]; | ||
126 | } | ||
127 | for(i=0; i<n; i++) | ||
128 | s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i; | ||
129 | s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); | ||
130 | } else { | ||
131 | int np, nblocks, np2, l; | ||
132 | FFTComplex *q; | ||
133 | |||
134 | for(i=0; i<(n/2); i++) { | ||
135 | alpha = 2 * M_PI * (float)i / (float)n; | ||
136 | c1 = cos(alpha); | ||
137 | s1 = sin(alpha) * s2; | ||
138 | s->exptab[i].re = c1; | ||
139 | s->exptab[i].im = s1; | ||
140 | } | ||
141 | |||
142 | np = 1 << nbits; | ||
143 | nblocks = np >> 3; | ||
144 | np2 = np >> 1; | ||
145 | s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); | ||
146 | if (!s->exptab1) | ||
147 | goto fail; | ||
148 | q = s->exptab1; | ||
149 | do { | ||
150 | for(l = 0; l < np2; l += 2 * nblocks) { | ||
151 | *q++ = s->exptab[l]; | ||
152 | *q++ = s->exptab[l + nblocks]; | ||
153 | |||
154 | q->re = -s->exptab[l].im; | ||
155 | q->im = s->exptab[l].re; | ||
156 | q++; | ||
157 | q->re = -s->exptab[l + nblocks].im; | ||
158 | q->im = s->exptab[l + nblocks].re; | ||
159 | q++; | ||
160 | } | ||
161 | nblocks = nblocks >> 1; | ||
162 | } while (nblocks != 0); | ||
163 | av_freep(&s->exptab); | ||
164 | |||
165 | /* compute bit reverse table */ | ||
166 | for(i=0;i<n;i++) { | ||
167 | m=0; | ||
168 | for(j=0;j<nbits;j++) { | ||
169 | m |= ((i >> j) & 1) << (nbits-j-1); | ||
170 | } | ||
171 | s->revtab[i]=m; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | return 0; | ||
176 | fail: | ||
177 | av_freep(&s->revtab); | ||
178 | av_freep(&s->exptab); | ||
179 | av_freep(&s->exptab1); | ||
180 | av_freep(&s->tmp_buf); | ||
181 | return -1; | ||
182 | } | ||
183 | |||
184 | void ff_fft_permute_c(FFTContext *s, FFTComplex *z) | ||
185 | { | ||
186 | int j, k, np; | ||
187 | FFTComplex tmp; | ||
188 | const uint16_t *revtab = s->revtab; | ||
189 | np = 1 << s->nbits; | ||
190 | |||
191 | if (s->tmp_buf) { | ||
192 | /* TODO: handle split-radix permute in a more optimal way, probably in-place */ | ||
193 | for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j]; | ||
194 | memcpy(z, s->tmp_buf, np * sizeof(FFTComplex)); | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | /* reverse */ | ||
199 | for(j=0;j<np;j++) { | ||
200 | k = revtab[j]; | ||
201 | if (k < j) { | ||
202 | tmp = z[k]; | ||
203 | z[k] = z[j]; | ||
204 | z[j] = tmp; | ||
205 | } | ||
206 | } | ||
207 | } | ||
208 | |||
209 | av_cold void ff_fft_end(FFTContext *s) | ||
210 | { | ||
211 | av_freep(&s->revtab); | ||
212 | av_freep(&s->exptab); | ||
213 | av_freep(&s->exptab1); | ||
214 | av_freep(&s->tmp_buf); | ||
215 | } | ||
216 | |||
217 | #define sqrthalf (float)M_SQRT1_2 | ||
218 | |||
219 | #define BF(x,y,a,b) {\ | ||
220 | x = a - b;\ | ||
221 | y = a + b;\ | ||
222 | } | ||
223 | |||
224 | #define BUTTERFLIES(a0,a1,a2,a3) {\ | ||
225 | BF(t3, t5, t5, t1);\ | ||
226 | BF(a2.re, a0.re, a0.re, t5);\ | ||
227 | BF(a3.im, a1.im, a1.im, t3);\ | ||
228 | BF(t4, t6, t2, t6);\ | ||
229 | BF(a3.re, a1.re, a1.re, t4);\ | ||
230 | BF(a2.im, a0.im, a0.im, t6);\ | ||
231 | } | ||
232 | |||
233 | // force loading all the inputs before storing any. | ||
234 | // this is slightly slower for small data, but avoids store->load aliasing | ||
235 | // for addresses separated by large powers of 2. | ||
236 | #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ | ||
237 | FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ | ||
238 | BF(t3, t5, t5, t1);\ | ||
239 | BF(a2.re, a0.re, r0, t5);\ | ||
240 | BF(a3.im, a1.im, i1, t3);\ | ||
241 | BF(t4, t6, t2, t6);\ | ||
242 | BF(a3.re, a1.re, r1, t4);\ | ||
243 | BF(a2.im, a0.im, i0, t6);\ | ||
244 | } | ||
245 | |||
246 | #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ | ||
247 | t1 = a2.re * wre + a2.im * wim;\ | ||
248 | t2 = a2.im * wre - a2.re * wim;\ | ||
249 | t5 = a3.re * wre - a3.im * wim;\ | ||
250 | t6 = a3.im * wre + a3.re * wim;\ | ||
251 | BUTTERFLIES(a0,a1,a2,a3)\ | ||
252 | } | ||
253 | |||
254 | #define TRANSFORM_ZERO(a0,a1,a2,a3) {\ | ||
255 | t1 = a2.re;\ | ||
256 | t2 = a2.im;\ | ||
257 | t5 = a3.re;\ | ||
258 | t6 = a3.im;\ | ||
259 | BUTTERFLIES(a0,a1,a2,a3)\ | ||
260 | } | ||
261 | |||
262 | /* z[0...8n-1], w[1...2n-1] */ | ||
263 | #define PASS(name)\ | ||
264 | static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\ | ||
265 | {\ | ||
266 | FFTSample t1, t2, t3, t4, t5, t6;\ | ||
267 | int o1 = 2*n;\ | ||
268 | int o2 = 4*n;\ | ||
269 | int o3 = 6*n;\ | ||
270 | const FFTSample *wim = wre+o1;\ | ||
271 | n--;\ | ||
272 | \ | ||
273 | TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ | ||
274 | TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ | ||
275 | do {\ | ||
276 | z += 2;\ | ||
277 | wre += 2;\ | ||
278 | wim -= 2;\ | ||
279 | TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ | ||
280 | TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ | ||
281 | } while(--n);\ | ||
282 | } | ||
283 | |||
284 | PASS(pass) | ||
285 | #undef BUTTERFLIES | ||
286 | #define BUTTERFLIES BUTTERFLIES_BIG | ||
287 | PASS(pass_big) | ||
288 | |||
289 | #define DECL_FFT(n,n2,n4)\ | ||
290 | static void fft##n(FFTComplex *z)\ | ||
291 | {\ | ||
292 | fft##n2(z);\ | ||
293 | fft##n4(z+n4*2);\ | ||
294 | fft##n4(z+n4*3);\ | ||
295 | pass(z,ff_cos_##n,n4/2);\ | ||
296 | } | ||
297 | |||
298 | static void fft4(FFTComplex *z) | ||
299 | { | ||
300 | FFTSample t1, t2, t3, t4, t5, t6, t7, t8; | ||
301 | |||
302 | BF(t3, t1, z[0].re, z[1].re); | ||
303 | BF(t8, t6, z[3].re, z[2].re); | ||
304 | BF(z[2].re, z[0].re, t1, t6); | ||
305 | BF(t4, t2, z[0].im, z[1].im); | ||
306 | BF(t7, t5, z[2].im, z[3].im); | ||
307 | BF(z[3].im, z[1].im, t4, t8); | ||
308 | BF(z[3].re, z[1].re, t3, t7); | ||
309 | BF(z[2].im, z[0].im, t2, t5); | ||
310 | } | ||
311 | |||
312 | static void fft8(FFTComplex *z) | ||
313 | { | ||
314 | FFTSample t1, t2, t3, t4, t5, t6, t7, t8; | ||
315 | |||
316 | fft4(z); | ||
317 | |||
318 | BF(t1, z[5].re, z[4].re, -z[5].re); | ||
319 | BF(t2, z[5].im, z[4].im, -z[5].im); | ||
320 | BF(t3, z[7].re, z[6].re, -z[7].re); | ||
321 | BF(t4, z[7].im, z[6].im, -z[7].im); | ||
322 | BF(t8, t1, t3, t1); | ||
323 | BF(t7, t2, t2, t4); | ||
324 | BF(z[4].re, z[0].re, z[0].re, t1); | ||
325 | BF(z[4].im, z[0].im, z[0].im, t2); | ||
326 | BF(z[6].re, z[2].re, z[2].re, t7); | ||
327 | BF(z[6].im, z[2].im, z[2].im, t8); | ||
328 | |||
329 | TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf); | ||
330 | } | ||
331 | |||
332 | #if !CONFIG_SMALL | ||
333 | static void fft16(FFTComplex *z) | ||
334 | { | ||
335 | FFTSample t1, t2, t3, t4, t5, t6; | ||
336 | |||
337 | fft8(z); | ||
338 | fft4(z+8); | ||
339 | fft4(z+12); | ||
340 | |||
341 | TRANSFORM_ZERO(z[0],z[4],z[8],z[12]); | ||
342 | TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf); | ||
343 | TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]); | ||
344 | TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]); | ||
345 | } | ||
346 | #else | ||
347 | DECL_FFT(16,8,4) | ||
348 | #endif | ||
349 | DECL_FFT(32,16,8) | ||
350 | DECL_FFT(64,32,16) | ||
351 | DECL_FFT(128,64,32) | ||
352 | DECL_FFT(256,128,64) | ||
353 | DECL_FFT(512,256,128) | ||
354 | #if !CONFIG_SMALL | ||
355 | #define pass pass_big | ||
356 | #endif | ||
357 | DECL_FFT(1024,512,256) | ||
358 | DECL_FFT(2048,1024,512) | ||
359 | DECL_FFT(4096,2048,1024) | ||
360 | DECL_FFT(8192,4096,2048) | ||
361 | DECL_FFT(16384,8192,4096) | ||
362 | DECL_FFT(32768,16384,8192) | ||
363 | DECL_FFT(65536,32768,16384) | ||
364 | |||
365 | static void (*fft_dispatch[])(FFTComplex*) = { | ||
366 | fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024, | ||
367 | fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, | ||
368 | }; | ||
369 | |||
370 | void ff_fft_calc_c(FFTContext *s, FFTComplex *z) | ||
371 | { | ||
372 | fft_dispatch[s->nbits-2](z); | ||
373 | } | ||
374 | |||
diff --git a/apps/codecs/libatrac/fixp_math.c b/apps/codecs/libatrac/fixp_math.c new file mode 100644 index 0000000000..3f578a1ade --- /dev/null +++ b/apps/codecs/libatrac/fixp_math.c | |||
@@ -0,0 +1,66 @@ | |||
1 | #include "fixp_math.h" | ||
2 | |||
3 | inline int32_t fixmul31(int32_t x, int32_t y) | ||
4 | { | ||
5 | int64_t temp; | ||
6 | temp = x; | ||
7 | temp *= y; | ||
8 | |||
9 | temp >>= 31; //16+31-16 = 31 bits | ||
10 | |||
11 | return (int32_t)temp; | ||
12 | } | ||
13 | |||
14 | /* | ||
15 | * Fast integer square root adapted from algorithm, | ||
16 | * Martin Guy @ UKC, June 1985. | ||
17 | * Originally from a book on programming abaci by Mr C. Woo. | ||
18 | * This is taken from : | ||
19 | * http://wiki.forum.nokia.com/index.php/How_to_use_fixed_point_maths#How_to_get_square_root_for_integers | ||
20 | * with a added shift up of the result by 8 bits to return result in 16.16 fixed-point representation. | ||
21 | */ | ||
22 | inline int32_t fastSqrt(int32_t n) | ||
23 | { | ||
24 | /* | ||
25 | * Logically, these are unsigned. | ||
26 | * We need the sign bit to test | ||
27 | * whether (op - res - one) underflowed. | ||
28 | */ | ||
29 | int32_t op, res, one; | ||
30 | op = n; | ||
31 | res = 0; | ||
32 | /* "one" starts at the highest power of four <= than the argument. */ | ||
33 | one = 1 << 30; /* second-to-top bit set */ | ||
34 | while (one > op) one >>= 2; | ||
35 | while (one != 0) | ||
36 | { | ||
37 | if (op >= res + one) | ||
38 | { | ||
39 | op = op - (res + one); | ||
40 | res = res + (one<<1); | ||
41 | } | ||
42 | res >>= 1; | ||
43 | one >>= 2; | ||
44 | } | ||
45 | return(res << 8); | ||
46 | } | ||
47 | |||
48 | inline int32_t fixmul16(int32_t x, int32_t y) | ||
49 | { | ||
50 | int64_t temp; | ||
51 | temp = x; | ||
52 | temp *= y; | ||
53 | |||
54 | temp >>= 16; | ||
55 | |||
56 | return (int32_t)temp; | ||
57 | } | ||
58 | |||
59 | inline int32_t fixdiv16(int32_t x, int32_t y) | ||
60 | { | ||
61 | int64_t temp; | ||
62 | temp = x << 16; | ||
63 | temp /= y; | ||
64 | |||
65 | return (int32_t)temp; | ||
66 | } | ||
diff --git a/apps/codecs/libatrac/fixp_math.h b/apps/codecs/libatrac/fixp_math.h new file mode 100644 index 0000000000..5bfc2c5703 --- /dev/null +++ b/apps/codecs/libatrac/fixp_math.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #include <stdlib.h> | ||
2 | |||
3 | /* Macros for converting between various fixed-point representations and floating point. */ | ||
4 | #define ONE_16 (1L << 16) | ||
5 | #define fixtof64(x) (float)((float)(x) / (float)(1 << 16)) //does not work on int64_t! | ||
6 | #define ftofix32(x) ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5 : 0.5))) | ||
7 | #define ftofix31(x) ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5 : 0.5))) | ||
8 | #define fix31tof64(x) (float)((float)(x) / (float)(1 << 31)) | ||
9 | |||
10 | /* Fixed point math routines for use in atrac3.c */ | ||
11 | inline int32_t fixdiv16(int32_t x, int32_t y); | ||
12 | inline int32_t fixmul16(int32_t x, int32_t y); | ||
13 | inline int32_t fixmul31(int32_t x, int32_t y); | ||
14 | inline int32_t fastSqrt(int32_t n); | ||
diff --git a/apps/codecs/libatrac/mdct.c b/apps/codecs/libatrac/mdct.c deleted file mode 100644 index 670b6d381e..0000000000 --- a/apps/codecs/libatrac/mdct.c +++ /dev/null | |||
@@ -1,245 +0,0 @@ | |||
1 | /* | ||
2 | * MDCT/IMDCT transforms | ||
3 | * Copyright (c) 2002 Fabrice Bellard | ||
4 | * | ||
5 | * This file is part of FFmpeg. | ||
6 | * | ||
7 | * FFmpeg is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU Lesser General Public | ||
9 | * License as published by the Free Software Foundation; either | ||
10 | * version 2.1 of the License, or (at your option) any later version. | ||
11 | * | ||
12 | * FFmpeg is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * Lesser General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU Lesser General Public | ||
18 | * License along with FFmpeg; if not, write to the Free Software | ||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
20 | */ | ||
21 | #include "dsputil.h" | ||
22 | |||
23 | #ifndef M_E | ||
24 | #define M_E 2.7182818284590452354 /* e */ | ||
25 | #endif | ||
26 | #ifndef M_LN2 | ||
27 | #define M_LN2 0.69314718055994530942 /* log_e 2 */ | ||
28 | #endif | ||
29 | #ifndef M_LN10 | ||
30 | #define M_LN10 2.30258509299404568402 /* log_e 10 */ | ||
31 | #endif | ||
32 | #ifndef M_PI | ||
33 | #define M_PI 3.14159265358979323846 /* pi */ | ||
34 | #endif | ||
35 | #ifndef M_SQRT1_2 | ||
36 | #define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ | ||
37 | #endif | ||
38 | |||
39 | /** | ||
40 | * @file libavcodec/mdct.c | ||
41 | * MDCT/IMDCT transforms. | ||
42 | */ | ||
43 | |||
44 | // Generate a Kaiser-Bessel Derived Window. | ||
45 | #define BESSEL_I0_ITER 50 // default: 50 iterations of Bessel I0 approximation | ||
46 | av_cold void ff_kbd_window_init(float *window, float alpha, int n) | ||
47 | { | ||
48 | int i, j; | ||
49 | double sum = 0.0, bessel, tmp; | ||
50 | double local_window[n]; | ||
51 | double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n); | ||
52 | |||
53 | for (i = 0; i < n; i++) { | ||
54 | tmp = i * (n - i) * alpha2; | ||
55 | bessel = 1.0; | ||
56 | for (j = BESSEL_I0_ITER; j > 0; j--) | ||
57 | bessel = bessel * tmp / (j * j) + 1; | ||
58 | sum += bessel; | ||
59 | local_window[i] = sum; | ||
60 | } | ||
61 | |||
62 | sum++; | ||
63 | for (i = 0; i < n; i++) | ||
64 | window[i] = sqrt(local_window[i] / sum); | ||
65 | } | ||
66 | |||
67 | DECLARE_ALIGNED(16, float, ff_sine_128 [ 128]); | ||
68 | DECLARE_ALIGNED(16, float, ff_sine_256 [ 256]); | ||
69 | DECLARE_ALIGNED(16, float, ff_sine_512 [ 512]); | ||
70 | DECLARE_ALIGNED(16, float, ff_sine_1024[1024]); | ||
71 | DECLARE_ALIGNED(16, float, ff_sine_2048[2048]); | ||
72 | DECLARE_ALIGNED(16, float, ff_sine_4096[4096]); | ||
73 | float *ff_sine_windows[6] = { | ||
74 | ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, ff_sine_2048, ff_sine_4096 | ||
75 | }; | ||
76 | |||
77 | // Generate a sine window. | ||
78 | av_cold void ff_sine_window_init(float *window, int n) { | ||
79 | int i; | ||
80 | for(i = 0; i < n; i++) | ||
81 | window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n))); | ||
82 | } | ||
83 | |||
84 | /** | ||
85 | * init MDCT or IMDCT computation. | ||
86 | */ | ||
87 | av_cold int ff_mdct_init(MDCTContext *s, int nbits, int inverse) | ||
88 | { | ||
89 | int n, n4, i; | ||
90 | double alpha; | ||
91 | |||
92 | memset(s, 0, sizeof(*s)); | ||
93 | n = 1 << nbits; | ||
94 | s->nbits = nbits; | ||
95 | s->n = n; | ||
96 | n4 = n >> 2; | ||
97 | s->tcos = av_malloc(n4 * sizeof(FFTSample)); | ||
98 | if (!s->tcos) | ||
99 | goto fail; | ||
100 | s->tsin = av_malloc(n4 * sizeof(FFTSample)); | ||
101 | if (!s->tsin) | ||
102 | goto fail; | ||
103 | |||
104 | for(i=0;i<n4;i++) { | ||
105 | alpha = 2 * M_PI * (i + 1.0 / 8.0) / n; | ||
106 | s->tcos[i] = -cos(alpha); | ||
107 | s->tsin[i] = -sin(alpha); | ||
108 | } | ||
109 | if (ff_fft_init(&s->fft, s->nbits - 2, inverse) < 0) | ||
110 | goto fail; | ||
111 | return 0; | ||
112 | fail: | ||
113 | av_freep(&s->tcos); | ||
114 | av_freep(&s->tsin); | ||
115 | return -1; | ||
116 | } | ||
117 | |||
118 | /* complex multiplication: p = a * b */ | ||
119 | #define CMUL(pre, pim, are, aim, bre, bim) \ | ||
120 | {\ | ||
121 | FFTSample _are = (are);\ | ||
122 | FFTSample _aim = (aim);\ | ||
123 | FFTSample _bre = (bre);\ | ||
124 | FFTSample _bim = (bim);\ | ||
125 | (pre) = _are * _bre - _aim * _bim;\ | ||
126 | (pim) = _are * _bim + _aim * _bre;\ | ||
127 | } | ||
128 | |||
129 | /** | ||
130 | * Compute the middle half of the inverse MDCT of size N = 2^nbits, | ||
131 | * thus excluding the parts that can be derived by symmetry | ||
132 | * @param output N/2 samples | ||
133 | * @param input N/2 samples | ||
134 | */ | ||
135 | void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input) | ||
136 | { | ||
137 | int k, n8, n4, n2, n, j; | ||
138 | const uint16_t *revtab = s->fft.revtab; | ||
139 | const FFTSample *tcos = s->tcos; | ||
140 | const FFTSample *tsin = s->tsin; | ||
141 | const FFTSample *in1, *in2; | ||
142 | FFTComplex *z = (FFTComplex *)output; | ||
143 | |||
144 | n = 1 << s->nbits; | ||
145 | n2 = n >> 1; | ||
146 | n4 = n >> 2; | ||
147 | n8 = n >> 3; | ||
148 | |||
149 | /* pre rotation */ | ||
150 | in1 = input; | ||
151 | in2 = input + n2 - 1; | ||
152 | for(k = 0; k < n4; k++) { | ||
153 | j=revtab[k]; | ||
154 | CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); | ||
155 | in1 += 2; | ||
156 | in2 -= 2; | ||
157 | } | ||
158 | ff_fft_calc(&s->fft, z); | ||
159 | |||
160 | /* post rotation + reordering */ | ||
161 | output += n4; | ||
162 | for(k = 0; k < n8; k++) { | ||
163 | FFTSample r0, i0, r1, i1; | ||
164 | CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]); | ||
165 | CMUL(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]); | ||
166 | z[n8-k-1].re = r0; | ||
167 | z[n8-k-1].im = i0; | ||
168 | z[n8+k ].re = r1; | ||
169 | z[n8+k ].im = i1; | ||
170 | } | ||
171 | } | ||
172 | |||
173 | /** | ||
174 | * Compute inverse MDCT of size N = 2^nbits | ||
175 | * @param output N samples | ||
176 | * @param input N/2 samples | ||
177 | */ | ||
178 | void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input) | ||
179 | { | ||
180 | int k; | ||
181 | int n = 1 << s->nbits; | ||
182 | int n2 = n >> 1; | ||
183 | int n4 = n >> 2; | ||
184 | |||
185 | ff_imdct_half_c(s, output+n4, input); | ||
186 | |||
187 | for(k = 0; k < n4; k++) { | ||
188 | output[k] = -output[n2-k-1]; | ||
189 | output[n-k-1] = output[n2+k]; | ||
190 | } | ||
191 | } | ||
192 | |||
193 | /** | ||
194 | * Compute MDCT of size N = 2^nbits | ||
195 | * @param input N samples | ||
196 | * @param out N/2 samples | ||
197 | */ | ||
198 | void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input) | ||
199 | { | ||
200 | int i, j, n, n8, n4, n2, n3; | ||
201 | FFTSample re, im; | ||
202 | const uint16_t *revtab = s->fft.revtab; | ||
203 | const FFTSample *tcos = s->tcos; | ||
204 | const FFTSample *tsin = s->tsin; | ||
205 | FFTComplex *x = (FFTComplex *)out; | ||
206 | |||
207 | n = 1 << s->nbits; | ||
208 | n2 = n >> 1; | ||
209 | n4 = n >> 2; | ||
210 | n8 = n >> 3; | ||
211 | n3 = 3 * n4; | ||
212 | |||
213 | /* pre rotation */ | ||
214 | for(i=0;i<n8;i++) { | ||
215 | re = -input[2*i+3*n4] - input[n3-1-2*i]; | ||
216 | im = -input[n4+2*i] + input[n4-1-2*i]; | ||
217 | j = revtab[i]; | ||
218 | CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]); | ||
219 | |||
220 | re = input[2*i] - input[n2-1-2*i]; | ||
221 | im = -(input[n2+2*i] + input[n-1-2*i]); | ||
222 | j = revtab[n8 + i]; | ||
223 | CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]); | ||
224 | } | ||
225 | |||
226 | ff_fft_calc(&s->fft, x); | ||
227 | |||
228 | /* post rotation */ | ||
229 | for(i=0;i<n8;i++) { | ||
230 | FFTSample r0, i0, r1, i1; | ||
231 | CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]); | ||
232 | CMUL(i0, r1, x[n8+i ].re, x[n8+i ].im, -tsin[n8+i ], -tcos[n8+i ]); | ||
233 | x[n8-i-1].re = r0; | ||
234 | x[n8-i-1].im = i0; | ||
235 | x[n8+i ].re = r1; | ||
236 | x[n8+i ].im = i1; | ||
237 | } | ||
238 | } | ||
239 | |||
240 | av_cold void ff_mdct_end(MDCTContext *s) | ||
241 | { | ||
242 | av_freep(&s->tcos); | ||
243 | av_freep(&s->tsin); | ||
244 | ff_fft_end(&s->fft); | ||
245 | } | ||