From 432e2ecc137d4fb4d9f6ac87cbbc38830a1f3c2c Mon Sep 17 00:00:00 2001 From: Mohamed Tarek Date: Thu, 13 Aug 2009 20:38:59 +0000 Subject: Modify libatrac to use fixed-point arithmetic. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22298 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libatrac/Makefile.test | 6 +- apps/codecs/libatrac/README.rockbox | 10 +- apps/codecs/libatrac/atrac3.c | 208 +- apps/codecs/libatrac/atrac3data_fixed.h | 145 ++ apps/codecs/libatrac/dsputil.c | 4114 ------------------------------- apps/codecs/libatrac/dsputil.h | 898 ------- apps/codecs/libatrac/fft.c | 374 --- apps/codecs/libatrac/fixp_math.c | 66 + apps/codecs/libatrac/fixp_math.h | 14 + apps/codecs/libatrac/mdct.c | 245 -- 10 files changed, 324 insertions(+), 5756 deletions(-) create mode 100644 apps/codecs/libatrac/atrac3data_fixed.h delete mode 100644 apps/codecs/libatrac/dsputil.c delete mode 100644 apps/codecs/libatrac/dsputil.h delete mode 100644 apps/codecs/libatrac/fft.c create mode 100644 apps/codecs/libatrac/fixp_math.c create mode 100644 apps/codecs/libatrac/fixp_math.h delete mode 100644 apps/codecs/libatrac/mdct.c (limited to 'apps/codecs') diff --git a/apps/codecs/libatrac/Makefile.test b/apps/codecs/libatrac/Makefile.test index 56b50b90fa..d1d35860e0 100644 --- a/apps/codecs/libatrac/Makefile.test +++ b/apps/codecs/libatrac/Makefile.test @@ -1,8 +1,8 @@ -CFLAGS = -Wall -O3 -DTEST -D"DEBUGF=printf" -OBJS = atrac3.o dsputil.o bitstream.o fft.o mdct.o libavutil/log.o libavutil/mem.o ../librm/rm.o +CFLAGS = -Wall -O3 -DTEST -D"DEBUGF=printf" -D"ROCKBOX_LITTLE_ENDIAN=1" -D"ICONST_ATTR=" -D"ICODE_ATTR=" +OBJS = atrac3.o bitstream.o libavutil/log.o libavutil/mem.o ../librm/rm.o fixp_math.o ../lib/mdct2.o ../lib/mdct_lookup.o atractest: $(OBJS) - gcc -o atractest $(OBJS) -lm + gcc -o atractest $(OBJS) .c.o : $(CC) $(CFLAGS) -c -o $@ $< diff --git a/apps/codecs/libatrac/README.rockbox b/apps/codecs/libatrac/README.rockbox index 7f62e10bb4..46738e7d7b 100644 --- a/apps/codecs/libatrac/README.rockbox +++ b/apps/codecs/libatrac/README.rockbox @@ -8,13 +8,19 @@ ffmpeg is licensed under the Lesser GNU General Public License. IMPORT DETAILS -The decoder is based on ffmpeg-svn r18079. It still uses floating -point math and not suitable to be used in rockbox. +The decoder is based on ffmpeg-svn r18079. +The decoder had been modified to use fixed-point arithmetic. TESTING The test program should compile in any Unix-like environment using the command "make -f Makefile.test". +For ARM targets add -DCPU_ARM to CFLAGS in Makefile.test to make use of +the asm ARM optimisations in rockbox's mdct library. + +For Big-endian targets, change -D"ROCKBOX_LITTLE_ENDIAN=1" +to -D"ROCKBOX_BIG_ENDIAN=1" in Makefile.test. + Running "./atractest file.rm" will decode the audio data to a WAV file called "output.wav" in the current directory. diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c index a800511397..838bbca48a 100644 --- a/apps/codecs/libatrac/atrac3.c +++ b/apps/codecs/libatrac/atrac3.c @@ -38,7 +38,6 @@ #include "avcodec.h" #include "bitstream.h" -#include "dsputil.h" #include "bytestream.h" #include @@ -50,6 +49,10 @@ #include "../librm/rm.h" #include "atrac3data.h" +#include "atrac3data_fixed.h" +#include "fixp_math.h" +//#include "fixp_mdct.h" +#include "../lib/mdct2.h" #define JOINT_STEREO 0x12 #define STEREO 0x2 @@ -70,23 +73,23 @@ typedef struct { typedef struct { int pos; int numCoefs; - float coef[8]; + int32_t coef[8]; } tonal_component; typedef struct { int bandsCoded; int numComponents; tonal_component components[64]; - float prevFrame[1024]; + int32_t prevFrame[1024]; int gcBlkSwitch; gain_block gainBlock[2]; - DECLARE_ALIGNED_16(float, spectrum[1024]); - DECLARE_ALIGNED_16(float, IMDCT_buf[1024]); + int32_t spectrum[1024] __attribute__((aligned(16))); + int32_t IMDCT_buf[1024] __attribute__((aligned(16))); - float delayBuf1[46]; ///num_gain_data == 0) - gain1 = 1.0; + gain1 = ONE_16; else gain1 = gain_tab1[pGain2->levcode[0]]; if (pGain1->num_gain_data == 0) { for (cnt = 0; cnt < 256; cnt++) - pOut[cnt] = pIn[cnt] * gain1 + pPrev[cnt]; + pOut[cnt] = fixmul16(pIn[cnt], gain1) + pPrev[cnt]; } else { numdata = pGain1->num_gain_data; pGain1->loccode[numdata] = 32; @@ -570,36 +553,38 @@ static void gainCompensateAndOverlap (float *pIn, float *pPrev, float *pOut, gai /* interpolate */ for (; nsample < startLoc; nsample++) - pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2; + pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]), gain2); /* interpolation is done over eight samples */ for (; nsample < endLoc; nsample++) { - pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2; - gain2 *= gain_inc; + pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]),gain2); + gain2 = fixmul16(gain2, gain_inc); } } for (; nsample < 256; nsample++) - pOut[nsample] = (pIn[nsample] * gain1) + pPrev[nsample]; + pOut[nsample] = fixmul16(pIn[nsample], gain1) + pPrev[nsample]; } /* Delay for the overlapping part. */ - memcpy(pPrev, &pIn[256], 256*sizeof(float)); + memcpy(pPrev, &pIn[256], 256*sizeof(int32_t)); } /** * Combine the tonal band spectrum and regular band spectrum * Return position of the last tonal coefficient + * * @param pSpectrum output spectrum buffer * @param numComponents amount of tonal components * @param pComponent tonal components for this band */ -static int addTonalComponents (float *pSpectrum, int numComponents, tonal_component *pComponent) +static int addTonalComponents (int32_t *pSpectrum, int numComponents, tonal_component *pComponent) { int cnt, i, lastPos = -1; - float *pIn, *pOut; + int32_t *pOut; + int32_t *pIn; for (cnt = 0; cnt < numComponents; cnt++){ lastPos = FFMAX(pComponent[cnt].pos + pComponent[cnt].numCoefs, lastPos); @@ -614,13 +599,13 @@ static int addTonalComponents (float *pSpectrum, int numComponents, tonal_compon } -#define INTERPOLATE(old,new,nsample) ((old) + (nsample)*0.125*((new)-(old))) +#define INTERPOLATE(old,new,nsample) ((old*ONE_16) + fixmul16(((nsample*ONE_16)>>3), (((new) - (old))*ONE_16))) -static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrCode) +static void reverseMatrixing(int32_t *su1, int32_t *su2, int *pPrevCode, int *pCurrCode) { int i, band, nsample, s1, s2; - float c1, c2; - float mc1_l, mc1_r, mc2_l, mc2_r; + int32_t c1, c2; + int32_t mc1_l, mc1_r, mc2_l, mc2_r; for (i=0,band = 0; band < 4*256; band+=256,i++) { s1 = pPrevCode[i]; @@ -629,18 +614,18 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC if (s1 != s2) { /* Selector value changed, interpolation needed. */ - mc1_l = matrixCoeffs[s1*2]; - mc1_r = matrixCoeffs[s1*2+1]; - mc2_l = matrixCoeffs[s2*2]; - mc2_r = matrixCoeffs[s2*2+1]; + mc1_l = matrixCoeffs_fix[s1<<1]; + mc1_r = matrixCoeffs_fix[(s1<<1)+1]; + mc2_l = matrixCoeffs_fix[s2<<1]; + mc2_r = matrixCoeffs_fix[(s2<<1)+1]; /* Interpolation is done over the first eight samples. */ for(; nsample < 8; nsample++) { c1 = su1[band+nsample]; c2 = su2[band+nsample]; - c2 = c1 * INTERPOLATE(mc1_l,mc2_l,nsample) + c2 * INTERPOLATE(mc1_r,mc2_r,nsample); + c2 = fixmul16(c1, INTERPOLATE(mc1_l, mc2_l, nsample)) + fixmul16(c2, INTERPOLATE(mc1_r, mc2_r, nsample)); su1[band+nsample] = c2; - su2[band+nsample] = c1 * 2.0 - c2; + su2[band+nsample] = (c1 << 1) - c2; } } @@ -650,8 +635,8 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC for (; nsample < 256; nsample++) { c1 = su1[band+nsample]; c2 = su2[band+nsample]; - su1[band+nsample] = c2 * 2.0; - su2[band+nsample] = (c1 - c2) * 2.0; + su1[band+nsample] = c2 << 1; + su2[band+nsample] = (c1 - c2) << 1; } break; @@ -659,8 +644,8 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC for (; nsample < 256; nsample++) { c1 = su1[band+nsample]; c2 = su2[band+nsample]; - su1[band+nsample] = (c1 + c2) * 2.0; - su2[band+nsample] = c2 * -2.0; + su1[band+nsample] = (c1 + c2) << 1; + su2[band+nsample] = -1*(c2 << 1); } break; case 2: @@ -678,24 +663,23 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC } } -static void getChannelWeights (int indx, int flag, float ch[2]){ - +static void getChannelWeights (int indx, int flag, int32_t ch[2]){ if (indx == 7) { - ch[0] = 1.0; - ch[1] = 1.0; + ch[0] = ONE_16; + ch[1] = ONE_16; } else { - ch[0] = (float)(indx & 7) / 7.0; - ch[1] = sqrt(2 - ch[0]*ch[0]); + ch[0] = fixdiv16(((indx & 7)*ONE_16), 7*ONE_16); + ch[1] = fastSqrt((ONE_16 << 1) - fixmul16(ch[0], ch[0])); if(flag) - FFSWAP(float, ch[0], ch[1]); + FFSWAP(int32_t, ch[0], ch[1]); } } -static void channelWeighting (float *su1, float *su2, int *p3) +static void channelWeighting (int32_t *su1, int32_t *su2, int *p3) { int band, nsample; /* w[x][y] y=0 is left y=1 is right */ - float w[2][2]; + int32_t w[2][2]; if (p3[1] != 7 || p3[3] != 7){ getChannelWeights(p3[1], p3[0], w[0]); @@ -704,13 +688,13 @@ static void channelWeighting (float *su1, float *su2, int *p3) for(band = 1; band < 4; band++) { /* scale the channels by the weights */ for(nsample = 0; nsample < 8; nsample++) { - su1[band*256+nsample] *= INTERPOLATE(w[0][0], w[0][1], nsample); - su2[band*256+nsample] *= INTERPOLATE(w[1][0], w[1][1], nsample); + su1[band*256+nsample] = fixmul16(su1[band*256+nsample], INTERPOLATE(w[0][0], w[0][1], nsample)); + su2[band*256+nsample] = fixmul16(su2[band*256+nsample], INTERPOLATE(w[1][0], w[1][1], nsample)); } for(; nsample < 256; nsample++) { - su1[band*256+nsample] *= w[1][0]; - su2[band*256+nsample] *= w[1][1]; + su1[band*256+nsample] = fixmul16(su1[band*256+nsample], w[1][0]); + su2[band*256+nsample] = fixmul16(su2[band*256+nsample], w[1][1]); } } } @@ -728,10 +712,9 @@ static void channelWeighting (float *su1, float *su2, int *p3) */ -static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_unit *pSnd, float *pOut, int channelNum, int codingMode) +static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_unit *pSnd, int32_t *pOut, int channelNum, int codingMode) { int band, result=0, numSubbands, lastTonal, numBands; - if (codingMode == JOINT_STEREO && channelNum == 1) { if (get_bits(gb,2) != 3) { av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n"); @@ -771,7 +754,7 @@ static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_ if (band <= numBands) { IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf, band&1); } else - memset(pSnd->IMDCT_buf, 0, 512 * sizeof(float)); + memset(pSnd->IMDCT_buf, 0, 512 * sizeof(int32_t)); /* gain compensation and overlapping */ gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]), @@ -795,7 +778,7 @@ static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_ static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf) { int result, i; - float *p1, *p2, *p3, *p4; + int32_t *p1, *p2, *p3, *p4; uint8_t *ptr1; if (q->codingMode == JOINT_STEREO) { @@ -893,7 +876,6 @@ static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf) static int atrac3_decode_frame(RMContext *rmctx, ATRAC3Context *q, void *data, int *data_size, const uint8_t *buf, int buf_size) { - //ATRAC3Context *q = rmctx->priv_data; int result = 0, i; const uint8_t* databuf; int16_t* samples = data; @@ -919,13 +901,13 @@ static int atrac3_decode_frame(RMContext *rmctx, ATRAC3Context *q, if (q->channels == 1) { /* mono */ for (i = 0; i<1024; i++) - samples[i] = av_clip_int16(round(q->outSamples[i])); + samples[i] = av_clip_int16(q->outSamples[i]); *data_size = 1024 * sizeof(int16_t); } else { /* stereo */ for (i = 0; i < 1024; i++) { - samples[i*2] = av_clip_int16(round(q->outSamples[i])); - samples[i*2+1] = av_clip_int16(round(q->outSamples[1024+i])); + samples[i*2] = av_clip_int16(q->outSamples[i]); + samples[i*2+1] = av_clip_int16(q->outSamples[1024+i]); } *data_size = 2048 * sizeof(int16_t); } @@ -944,7 +926,6 @@ static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx) { int i; const uint8_t *edata_ptr = rmctx->codec_extradata; - //ATRAC3Context *q = rmctx->priv_data; static VLC_TYPE atrac3_vlc_table[4096][2]; static int vlcs_initialized = 0; @@ -1051,17 +1032,6 @@ static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx) init_atrac3_transforms(q); - /* Generate the scale factors. */ - for (i=0 ; i<64 ; i++) - SFTable[i] = pow(2.0, (i - 15) / 3.0); - - /* Generate gain tables. */ - for (i=0 ; i<16 ; i++) - gain_tab1[i] = powf (2.0, (4 - i)); - - for (i=-15 ; i<16 ; i++) - gain_tab2[i+15] = powf (2.0, i * -0.125); - /* init the joint-stereo decoding data */ q->weighting_delay[0] = 0; q->weighting_delay[1] = 7; @@ -1076,8 +1046,6 @@ static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx) q->matrix_coeff_index_next[i] = 3; } - dsputil_init(&dsp); - q->pUnits = av_mallocz(sizeof(channel_unit)*q->channels); if (!q->pUnits) { av_free(q->decoded_bytes_buffer); diff --git a/apps/codecs/libatrac/atrac3data_fixed.h b/apps/codecs/libatrac/atrac3data_fixed.h new file mode 100644 index 0000000000..8dbc952c2f --- /dev/null +++ b/apps/codecs/libatrac/atrac3data_fixed.h @@ -0,0 +1,145 @@ + +/* tables for the scalefactor decoding */ +/* scaled by 2^31*/ +static const int32_t iMaxQuant_fix[8] = { + 0x0, 0x55555580, 0x33333340, 0x24924940, 0x1c71c720, 0x11111120, 0x8421080, + 0x4104108 +}; + +/* scaled by 2^16 */ +static const int32_t SFTable_fixed[64] = { + 0x00000800, 0x00000a14, 0x00000cb3, 0x00001000, 0x00001429, 0x00001966, + 0x00002000, 0x00002851, 0x000032cc, 0x00004000, 0x000050a3, 0x00006598, + 0x00008000, 0x0000a145, 0x0000cb30, 0x00010000, 0x0001428a, 0x00019660, + 0x00020000, 0x00028514, 0x00032cc0, 0x00040000, 0x00050a29, 0x00065980, + 0x00080000, 0x000a1452, 0x000cb2ff, 0x00100000, 0x001428a3, 0x001965ff, + 0x00200000, 0x00285146, 0x0032cbfd, 0x00400000, 0x0050a28c, 0x006597fb, + 0x00800000, 0x00a14518, 0x00cb2ff5, 0x01000000, 0x01428a30, 0x01965fea, + 0x02000000, 0x02851460, 0x032cbfd4, 0x04000000, 0x050a28c0, 0x06597fa8, + 0x08000000, 0x0a145180, 0x0cb2ff50, 0x10000000, 0x1428a300, 0x1965fea0, + 0x20000000, 0x28514600, 0x32cbfd40, 0x40000000, 0x50a28c00, 0x6597fa80, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, +}; + +/* transform data */ +/* floating point values scaled by 2^31 */ +static const int32_t qmf_48tap_half_fix[24] = { + 0xffff855e, 0xfffcfbca, 0xfffe28eb, 0x9de6b, 0x7f028, 0xffe40d08, + 0xffeef140, 0x42a692, 0x19ab1f, 0xff75dec7, 0xffe738f5, 0x100e928, + 0xfffdfedf, 0xfe478b84, 0x50b279, 0x2c83f88, 0xff005ad7, 0xfba2ee80, + 0x2685970, 0x6f42798, 0xfa6b6f10, 0xf3475f80, 0x10e7f7c0, 0x3b6c44c0 +}; + +/* mdct window scaled by 2^31 */ +static const int32_t window_lookup[512] = { + 0xffffb10c, 0xfffd394b, 0xfff8494f, 0xfff0e025, 0xffe6fc5f, 0xffda9c15, + 0xffcbbce6, 0xffba5bf4, 0xffa675e8, 0xff9006f0, 0xff770aba, 0xff5b7c7e, + 0xff3d56f2, 0xff1c9452, 0xfef92e59, 0xfed31e45, 0xfeaa5cd5, 0xfe7ee247, + 0xfe50a657, 0xfe1fa041, 0xfdebc6c1, 0xfdb5100d, 0xfd7b71d5, 0xfd3ee149, + 0xfcff5311, 0xfcbcbb49, 0xfc770d99, 0xfc2e3d15, 0xfbe23c39, 0xfb92fd29, + 0xfb407141, 0xfaea8989, 0xfa913661, 0xfa3467b1, 0xf9d40cd9, 0xf9701499, + 0xf9086d41, 0xf89d04a9, 0xf82dc7f1, 0xf7baa3e1, 0xf74384b1, 0xf6c85611, + 0xf6490321, 0xf5c576b1, 0xf53d9b21, 0xf4b15a01, 0xf4209ce1, 0xf38b4c71, + 0xf2f15171, 0xf2529411, 0xf1aefbf1, 0xf10670a1, 0xf058d941, 0xefa61cc1, + 0xeeee21c1, 0xee30cec1, 0xed6e0a41, 0xeca5ba61, 0xebd7c5c1, 0xeb041241, + 0xea2a8601, 0xe94b0861, 0xe8657f61, 0xe779d241, 0xe687e861, 0xe58fa9e1, + 0xe490fec1, 0xe38bd101, 0xe28009c1, 0xe16d93e1, 0xe0545ba1, 0xdf344dc1, + 0xde0d5881, 0xdcdf6bc1, 0xdbaa7801, 0xda6e70c1, 0xd92b4ac1, 0xd7e0fc81, + 0xd68f7ec1, 0xd536cd41, 0xd3d6e5c1, 0xd26fc901, 0xd10179c1, 0xcf8bff41, + 0xce0f6301, 0xcc8bb241, 0xcb00fdc1, 0xc96f5b01, 0xc7d6e141, 0xc637af41, + 0xc491e4c1, 0xc2e5a801, 0xc1332401, 0xbf7a8701, 0xbdbc0681, 0xbbf7da01, + 0xba2e4181, 0xb85f7f81, 0xb68bde01, 0xb4b3a981, 0xb2d73781, 0xb0f6df01, + 0xaf12ff01, 0xad2bfa81, 0xab423981, 0xa9562981, 0xa7683c01, 0xa578e701, + 0xa388a681, 0xa197f801, 0x9fa75e81, 0x9db75f01, 0x9bc88201, 0x99db5301, + 0x97f06001, 0x96083601, 0x94236601, 0x92427f81, 0x90661481, 0x8e8eb481, + 0x8cbced01, 0x8af14d81, 0x892c5f81, 0x876eab01, 0x85b8b681, 0x840b0301, + 0x82660c01, 0x80ca4a01, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80ca4a01, 0x82660c01, 0x840b0301, 0x85b8b681, 0x876eab01, 0x892c5f81, + 0x8af14d81, 0x8cbced01, 0x8e8eb481, 0x90661481, 0x92427f81, 0x94236601, + 0x96083601, 0x97f06001, 0x99db5301, 0x9bc88201, 0x9db75f01, 0x9fa75e81, + 0xa197f801, 0xa388a681, 0xa578e701, 0xa7683c01, 0xa9562981, 0xab423981, + 0xad2bfa81, 0xaf12ff01, 0xb0f6df01, 0xb2d73781, 0xb4b3a981, 0xb68bde01, + 0xb85f7f81, 0xba2e4181, 0xbbf7da01, 0xbdbc0681, 0xbf7a8701, 0xc1332401, + 0xc2e5a801, 0xc491e4c1, 0xc637af41, 0xc7d6e141, 0xc96f5b01, 0xcb00fdc1, + 0xcc8bb241, 0xce0f6301, 0xcf8bff41, 0xd10179c1, 0xd26fc901, 0xd3d6e5c1, + 0xd536cd41, 0xd68f7ec1, 0xd7e0fc81, 0xd92b4ac1, 0xda6e70c1, 0xdbaa7801, + 0xdcdf6bc1, 0xde0d5881, 0xdf344dc1, 0xe0545ba1, 0xe16d93e1, 0xe28009c1, + 0xe38bd101, 0xe490fec1, 0xe58fa9e1, 0xe687e861, 0xe779d241, 0xe8657f61, + 0xe94b0861, 0xea2a8601, 0xeb041241, 0xebd7c5c1, 0xeca5ba61, 0xed6e0a41, + 0xee30cec1, 0xeeee21c1, 0xefa61cc1, 0xf058d941, 0xf10670a1, 0xf1aefbf1, + 0xf2529411, 0xf2f15171, 0xf38b4c71, 0xf4209ce1, 0xf4b15a01, 0xf53d9b21, + 0xf5c576b1, 0xf6490321, 0xf6c85611, 0xf74384b1, 0xf7baa3e1, 0xf82dc7f1, + 0xf89d04a9, 0xf9086d41, 0xf9701499, 0xf9d40cd9, 0xfa3467b1, 0xfa913661, + 0xfaea8989, 0xfb407141, 0xfb92fd29, 0xfbe23c39, 0xfc2e3d15, 0xfc770d99, + 0xfcbcbb49, 0xfcff5311, 0xfd3ee149, 0xfd7b71d5, 0xfdb5100d, 0xfdebc6c1, + 0xfe1fa041, 0xfe50a657, 0xfe7ee247, 0xfeaa5cd5, 0xfed31e45, 0xfef92e59, + 0xff1c9452, 0xff3d56f2, 0xff5b7c7e, 0xff770aba, 0xff9006f0, 0xffa675e8, + 0xffba5bf4, 0xffcbbce6, 0xffda9c15, 0xffe6fc5f, 0xfff0e025, 0xfff8494f, + 0xfffd394b, 0xffffb10c, +}; + +/* Gain tables scaled by 2^16 */ +static const int32_t gain_tab1[16] = { + 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000, + 0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, + 0x00000100, 0x00000080, 0x00000040, 0x00000020, +}; + +static const int32_t gain_tab2[31] = { + 0x0003ab03, 0x00035d14, 0x0003159d, 0x0002d414, 0x000297fb, 0x000260e0, + 0x00022e57, 0x00020000, 0x0001d582, 0x0001ae8a, 0x00018ace, 0x00016a0a, + 0x00014bfe, 0x00013070, 0x0001172c, 0x00010000, 0x0000eac1, 0x0000d745, + 0x0000c567, 0x0000b505, 0x0000a5ff, 0x00009838, 0x00008b96, 0x00008000, + 0x00007560, 0x00006ba2, 0x000062b4, 0x00005a82, 0x000052ff, 0x00004c1c, + 0x000045cb, + +}; + +/* Joint-Stereo related tables, scaled by 2^16 */ +static const int32_t matrixCoeffs_fix[8] = { + 0x00000000, 0x00020000, 0x00020000, 0x00020000, + 0x00000000, 0x00000000, 0x00010000, 0x00010000, +}; + diff --git a/apps/codecs/libatrac/dsputil.c b/apps/codecs/libatrac/dsputil.c deleted file mode 100644 index 412a934862..0000000000 --- a/apps/codecs/libatrac/dsputil.c +++ /dev/null @@ -1,4114 +0,0 @@ -/* - * DSP utils - * Copyright (c) 2000, 2001 Fabrice Bellard - * Copyright (c) 2002-2004 Michael Niedermayer - * - * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/** - * @file libavcodec/dsputil.c - * DSP utils - */ - -#include "avcodec.h" -#include "dsputil.h" -/*#include "simple_idct.h" -#include "faandct.h" -#include "faanidct.h" -#include "mathops.h" -#include "h263.h" -#include "snow.h" */ - -/* snow.c */ -void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); - -/* vorbis.c */ -void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); - -/* ac3dec.c */ -void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); - -/* flacenc.c */ -void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc); - -/* pngdec.c */ -void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); - -/* eaidct.c */ -void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); - -uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; -uint32_t ff_squareTbl[512] = {0, }; - -// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size -#define pb_7f (~0UL/255 * 0x7f) -#define pb_80 (~0UL/255 * 0x80) - -const uint8_t ff_zigzag_direct[64] = { - 0, 1, 8, 16, 9, 2, 3, 10, - 17, 24, 32, 25, 18, 11, 4, 5, - 12, 19, 26, 33, 40, 48, 41, 34, - 27, 20, 13, 6, 7, 14, 21, 28, - 35, 42, 49, 56, 57, 50, 43, 36, - 29, 22, 15, 23, 30, 37, 44, 51, - 58, 59, 52, 45, 38, 31, 39, 46, - 53, 60, 61, 54, 47, 55, 62, 63 -}; - -/* Specific zigzag scan for 248 idct. NOTE that unlike the - specification, we interleave the fields */ -const uint8_t ff_zigzag248_direct[64] = { - 0, 8, 1, 9, 16, 24, 2, 10, - 17, 25, 32, 40, 48, 56, 33, 41, - 18, 26, 3, 11, 4, 12, 19, 27, - 34, 42, 49, 57, 50, 58, 35, 43, - 20, 28, 5, 13, 6, 14, 21, 29, - 36, 44, 51, 59, 52, 60, 37, 45, - 22, 30, 7, 15, 23, 31, 38, 46, - 53, 61, 54, 62, 39, 47, 55, 63, -}; - -/* not permutated inverse zigzag_direct + 1 for MMX quantizer */ -DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, }; - -const uint8_t ff_alternate_horizontal_scan[64] = { - 0, 1, 2, 3, 8, 9, 16, 17, - 10, 11, 4, 5, 6, 7, 15, 14, - 13, 12, 19, 18, 24, 25, 32, 33, - 26, 27, 20, 21, 22, 23, 28, 29, - 30, 31, 34, 35, 40, 41, 48, 49, - 42, 43, 36, 37, 38, 39, 44, 45, - 46, 47, 50, 51, 56, 57, 58, 59, - 52, 53, 54, 55, 60, 61, 62, 63, -}; - -const uint8_t ff_alternate_vertical_scan[64] = { - 0, 8, 16, 24, 1, 9, 2, 10, - 17, 25, 32, 40, 48, 56, 57, 49, - 41, 33, 26, 18, 3, 11, 4, 12, - 19, 27, 34, 42, 50, 58, 35, 43, - 51, 59, 20, 28, 5, 13, 6, 14, - 21, 29, 36, 44, 52, 60, 37, 45, - 53, 61, 22, 30, 7, 15, 23, 31, - 38, 46, 54, 62, 39, 47, 55, 63, -}; - -/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ -const uint32_t ff_inverse[256]={ - 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, - 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, - 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, - 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, - 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, - 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, - 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, - 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, - 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, - 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, - 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, - 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, - 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, - 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, - 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, - 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, - 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, - 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, - 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, - 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, - 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, - 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, - 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, - 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, - 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, - 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, - 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, - 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, - 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, - 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, - 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, - 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, -}; - -/* Input permutation for the simple_idct_mmx */ -static const uint8_t simple_mmx_permutation[64]={ - 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, - 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, - 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, - 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, - 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, - 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, - 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, - 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, -}; - -static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; - -void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){ - int i; - int end; - - st->scantable= src_scantable; - - for(i=0; i<64; i++){ - int j; - j = src_scantable[i]; - st->permutated[i] = permutation[j]; -#if ARCH_PPC - st->inverse[j] = i; -#endif - } - - end=-1; - for(i=0; i<64; i++){ - int j; - j = st->permutated[i]; - if(j>end) end=j; - st->raster_end[i]= end; - } -} - -#if CONFIG_SNOW_ENCODER //dwt is in snow.c -static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ - int s, i, j; - const int dec_count= w==8 ? 3 : 4; - int tmp[32*32]; - int level, ori; - static const int scale[2][2][4][4]={ - { - { - // 9/7 8x8 dec=3 - {268, 239, 239, 213}, - { 0, 224, 224, 152}, - { 0, 135, 135, 110}, - },{ - // 9/7 16x16 or 32x32 dec=4 - {344, 310, 310, 280}, - { 0, 320, 320, 228}, - { 0, 175, 175, 136}, - { 0, 129, 129, 102}, - } - },{ - { - // 5/3 8x8 dec=3 - {275, 245, 245, 218}, - { 0, 230, 230, 156}, - { 0, 138, 138, 113}, - },{ - // 5/3 16x16 or 32x32 dec=4 - {352, 317, 317, 286}, - { 0, 328, 328, 233}, - { 0, 180, 180, 140}, - { 0, 132, 132, 105}, - } - } - }; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j+=4) { - tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; - tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; - tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; - tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; - } - pix1 += line_size; - pix2 += line_size; - } - - ff_spatial_dwt(tmp, w, h, 32, type, dec_count); - - s=0; - assert(w==h); - for(level=0; level>(dec_count-level); - int sx= (ori&1) ? size : 0; - int stride= 32<<(dec_count-level); - int sy= (ori&2) ? stride>>1 : 0; - - for(i=0; i=0); - return s>>9; -} - -static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ - return w_c(v, pix1, pix2, line_size, 8, h, 1); -} - -static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ - return w_c(v, pix1, pix2, line_size, 8, h, 0); -} - -static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ - return w_c(v, pix1, pix2, line_size, 16, h, 1); -} - -static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ - return w_c(v, pix1, pix2, line_size, 16, h, 0); -} - -int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ - return w_c(v, pix1, pix2, line_size, 32, h, 1); -} - -int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ - return w_c(v, pix1, pix2, line_size, 32, h, 0); -} -#endif - -/** - * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples. - * @param buf destination buffer - * @param src source buffer - * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers - * @param block_w width of block - * @param block_h height of block - * @param src_x x coordinate of the top left sample of the block in the source buffer - * @param src_y y coordinate of the top left sample of the block in the source buffer - * @param w width of the source buffer - * @param h height of the source buffer - */ -void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, - int src_x, int src_y, int w, int h){ - int x, y; - int start_y, start_x, end_y, end_x; - - if(src_y>= h){ - src+= (h-1-src_y)*linesize; - src_y=h-1; - }else if(src_y<=-block_h){ - src+= (1-block_h-src_y)*linesize; - src_y=1-block_h; - } - if(src_x>= w){ - src+= (w-1-src_x); - src_x=w-1; - }else if(src_x<=-block_w){ - src+= (1-block_w-src_x); - src_x=1-block_w; - } - - start_y= FFMAX(0, -src_y); - start_x= FFMAX(0, -src_x); - end_y= FFMIN(block_h, h-src_y); - end_x= FFMIN(block_w, w-src_x); - - // copy existing part - for(y=start_y; y 127) - *pixels = 255; - else - *pixels = (uint8_t)(*block + 128); - block++; - pixels++; - } - pixels += (line_size - 8); - } -} - -static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, - int line_size) -{ - int i; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - - /* read the pixels */ - for(i=0;i<8;i++) { - pixels[0] = cm[pixels[0] + block[0]]; - pixels[1] = cm[pixels[1] + block[1]]; - pixels[2] = cm[pixels[2] + block[2]]; - pixels[3] = cm[pixels[3] + block[3]]; - pixels[4] = cm[pixels[4] + block[4]]; - pixels[5] = cm[pixels[5] + block[5]]; - pixels[6] = cm[pixels[6] + block[6]]; - pixels[7] = cm[pixels[7] + block[7]]; - pixels += line_size; - block += 8; - } -} - -static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, - int line_size) -{ - int i; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - - /* read the pixels */ - for(i=0;i<4;i++) { - pixels[0] = cm[pixels[0] + block[0]]; - pixels[1] = cm[pixels[1] + block[1]]; - pixels[2] = cm[pixels[2] + block[2]]; - pixels[3] = cm[pixels[3] + block[3]]; - pixels += line_size; - block += 8; - } -} - -static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, - int line_size) -{ - int i; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - - /* read the pixels */ - for(i=0;i<2;i++) { - pixels[0] = cm[pixels[0] + block[0]]; - pixels[1] = cm[pixels[1] + block[1]]; - pixels += line_size; - block += 8; - } -} - -static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) -{ - int i; - for(i=0;i<8;i++) { - pixels[0] += block[0]; - pixels[1] += block[1]; - pixels[2] += block[2]; - pixels[3] += block[3]; - pixels[4] += block[4]; - pixels[5] += block[5]; - pixels[6] += block[6]; - pixels[7] += block[7]; - pixels += line_size; - block += 8; - } -} - -static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) -{ - int i; - for(i=0;i<4;i++) { - pixels[0] += block[0]; - pixels[1] += block[1]; - pixels[2] += block[2]; - pixels[3] += block[3]; - pixels += line_size; - block += 4; - } -} - -static int sum_abs_dctelem_c(DCTELEM *block) -{ - int sum=0, i; - for(i=0; i<64; i++) - sum+= FFABS(block[i]); - return sum; -} - -#if 0 - -#define PIXOP2(OPNAME, OP) \ -static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+1);\ - uint64_t l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0202020202020202ULL;\ - uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - uint64_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN64(pixels );\ - b= AV_RN64(pixels+1);\ - l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0202020202020202ULL;\ - h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+1);\ - uint64_t l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0101010101010101ULL;\ - uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - uint64_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN64(pixels );\ - b= AV_RN64(pixels+1);\ - l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0101010101010101ULL;\ - h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) - -#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) -#else // 64 bit variant - -#define PIXOP2(OPNAME, OP) \ -static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - int i;\ - for(i=0; i>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - l1= (c&0x03030303UL)\ - + (d&0x03030303UL);\ - h1= ((c&0xFCFCFCFCUL)>>2)\ - + ((d&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - a= AV_RN32(&src1[i*src_stride1+4]);\ - b= AV_RN32(&src2[i*src_stride2+4]);\ - c= AV_RN32(&src3[i*src_stride3+4]);\ - d= AV_RN32(&src4[i*src_stride4+4]);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - l1= (c&0x03030303UL)\ - + (d&0x03030303UL);\ - h1= ((c&0xFCFCFCFCUL)>>2)\ - + ((d&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - }\ -}\ -\ -static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ - int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - int i;\ - for(i=0; i>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - l1= (c&0x03030303UL)\ - + (d&0x03030303UL);\ - h1= ((c&0xFCFCFCFCUL)>>2)\ - + ((d&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - a= AV_RN32(&src1[i*src_stride1+4]);\ - b= AV_RN32(&src2[i*src_stride2+4]);\ - c= AV_RN32(&src3[i*src_stride3+4]);\ - d= AV_RN32(&src4[i*src_stride4+4]);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x01010101UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - l1= (c&0x03030303UL)\ - + (d&0x03030303UL);\ - h1= ((c&0xFCFCFCFCUL)>>2)\ - + ((d&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - }\ -}\ -static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ - int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ - OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ -}\ -static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ - int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ - OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ -}\ -\ -static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i, a0, b0, a1, b1;\ - a0= pixels[0];\ - b0= pixels[1] + 2;\ - a0 += b0;\ - b0 += pixels[2];\ -\ - pixels+=line_size;\ - for(i=0; i>2; /* FIXME non put */\ - block[1]= (b1+b0)>>2;\ -\ - pixels+=line_size;\ - block +=line_size;\ -\ - a0= pixels[0];\ - b0= pixels[1] + 2;\ - a0 += b0;\ - b0 += pixels[2];\ -\ - block[0]= (a1+a0)>>2;\ - block[1]= (b1+b0)>>2;\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - const uint32_t a= AV_RN32(pixels );\ - const uint32_t b= AV_RN32(pixels+1);\ - uint32_t l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - uint32_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN32(pixels );\ - b= AV_RN32(pixels+1);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int j;\ - for(j=0; j<2; j++){\ - int i;\ - const uint32_t a= AV_RN32(pixels );\ - const uint32_t b= AV_RN32(pixels+1);\ - uint32_t l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - uint32_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN32(pixels );\ - b= AV_RN32(pixels+1);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ - pixels+=4-line_size*(h+1);\ - block +=4-line_size*h;\ - }\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int j;\ - for(j=0; j<2; j++){\ - int i;\ - const uint32_t a= AV_RN32(pixels );\ - const uint32_t b= AV_RN32(pixels+1);\ - uint32_t l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x01010101UL;\ - uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - uint32_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN32(pixels );\ - b= AV_RN32(pixels+1);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x01010101UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ - pixels+=4-line_size*(h+1);\ - block +=4-line_size*h;\ - }\ -}\ -\ -CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ - -#define op_avg(a, b) a = rnd_avg32(a, b) -#endif -#define op_put(a, b) a = b - -PIXOP2(avg, op_avg) -PIXOP2(put, op_put) -#undef op_avg -#undef op_put - -#define avg2(a,b) ((a+b+1)>>1) -#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) - -static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ - put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); -} - -static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ - put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); -} - -static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) -{ - const int A=(16-x16)*(16-y16); - const int B=( x16)*(16-y16); - const int C=(16-x16)*( y16); - const int D=( x16)*( y16); - int i; - - for(i=0; i>8; - dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; - dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; - dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; - dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; - dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; - dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; - dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; - dst+= stride; - src+= stride; - } -} - -void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) -{ - int y, vx, vy; - const int s= 1<>16; - src_y= vy>>16; - frac_x= src_x&(s-1); - frac_y= src_y&(s-1); - src_x>>=shift; - src_y>>=shift; - - if((unsigned)src_x < width){ - if((unsigned)src_y < height){ - index= src_x + src_y*stride; - dst[y*stride + x]= ( ( src[index ]*(s-frac_x) - + src[index +1]* frac_x )*(s-frac_y) - + ( src[index+stride ]*(s-frac_x) - + src[index+stride+1]* frac_x )* frac_y - + r)>>(shift*2); - }else{ - index= src_x + av_clip(src_y, 0, height)*stride; - dst[y*stride + x]= ( ( src[index ]*(s-frac_x) - + src[index +1]* frac_x )*s - + r)>>(shift*2); - } - }else{ - if((unsigned)src_y < height){ - index= av_clip(src_x, 0, width) + src_y*stride; - dst[y*stride + x]= ( ( src[index ]*(s-frac_y) - + src[index+stride ]* frac_y )*s - + r)>>(shift*2); - }else{ - index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride; - dst[y*stride + x]= src[index ]; - } - } - - vx+= dxx; - vy+= dyx; - } - ox += dxy; - oy += dyy; - } -} - -static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - switch(width){ - case 2: put_pixels2_c (dst, src, stride, height); break; - case 4: put_pixels4_c (dst, src, stride, height); break; - case 8: put_pixels8_c (dst, src, stride, height); break; - case 16:put_pixels16_c(dst, src, stride, height); break; - } -} - -static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; - } - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; - } - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; - } - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; - } - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; - } - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; - } - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; - } - src += stride; - dst += stride; - } -} - -static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; - } - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - switch(width){ - case 2: avg_pixels2_c (dst, src, stride, height); break; - case 4: avg_pixels4_c (dst, src, stride, height); break; - case 8: avg_pixels8_c (dst, src, stride, height); break; - case 16:avg_pixels16_c(dst, src, stride, height); break; - } -} - -static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; - } - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; - } - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; - } - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; - } - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; - } - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; - } - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; - } - src += stride; - dst += stride; - } -} - -static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ - int i,j; - for (i=0; i < height; i++) { - for (j=0; j < width; j++) { - dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; - } - src += stride; - dst += stride; - } -} -#if 0 -#define TPEL_WIDTH(width)\ -static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ - void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ -static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ - void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ -static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ - void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ -static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ - void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ -static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ - void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ -static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ - void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ -static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ - void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ -static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ - void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ -static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ - void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} -#endif - -#define H264_CHROMA_MC(OPNAME, OP)\ -static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ - const int A=(8-x)*(8-y);\ - const int B=( x)*(8-y);\ - const int C=(8-x)*( y);\ - const int D=( x)*( y);\ - int i;\ - \ - assert(x<8 && y<8 && x>=0 && y>=0);\ -\ - if(D){\ - for(i=0; i=0 && y>=0);\ -\ - if(D){\ - for(i=0; i=0 && y>=0);\ -\ - if(D){\ - for(i=0; i>6)+1)>>1) -#define op_put(a, b) a = (((b) + 32)>>6) - -H264_CHROMA_MC(put_ , op_put) -H264_CHROMA_MC(avg_ , op_avg) -#undef op_avg -#undef op_put - -static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ - const int A=(8-x)*(8-y); - const int B=( x)*(8-y); - const int C=(8-x)*( y); - const int D=( x)*( y); - int i; - - assert(x<8 && y<8 && x>=0 && y>=0); - - for(i=0; i> 6; - dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6; - dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6; - dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6; - dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6; - dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6; - dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6; - dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6; - dst+= stride; - src+= stride; - } -} - -#define QPEL_MC(r, OPNAME, RND, OP) \ -static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i>5]+1)>>1) -#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) -#define op_put(a, b) a = cm[((b) + 16)>>5] -#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] - -QPEL_MC(0, put_ , _ , op_put) -QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) -QPEL_MC(0, avg_ , _ , op_avg) -//QPEL_MC(1, avg_no_rnd , _ , op_avg) -#undef op_avg -#undef op_avg_no_rnd -#undef op_put -#undef op_put_no_rnd - -#if 1 -#define H264_LOWPASS(OPNAME, OP, OP2) \ -static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int h=2;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i>5]+1)>>1) -//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) -#define op_put(a, b) a = cm[((b) + 16)>>5] -#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) -#define op2_put(a, b) a = cm[((b) + 512)>>10] - -H264_LOWPASS(put_ , op_put, op2_put) -H264_LOWPASS(avg_ , op_avg, op2_avg) -H264_MC(put_, 2) -H264_MC(put_, 4) -H264_MC(put_, 8) -H264_MC(put_, 16) -H264_MC(avg_, 4) -H264_MC(avg_, 8) -H264_MC(avg_, 16) - -#undef op_avg -#undef op_put -#undef op2_avg -#undef op2_put -#endif - -#define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom ) -#define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) -#define H264_WEIGHT(W,H) \ -static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ - int y; \ - offset <<= log2_denom; \ - if(log2_denom) offset += 1<<(log2_denom-1); \ - for(y=0; y>4]; - dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; - dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; - dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; - dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; - dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; - dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; - dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; - dst+=dstStride; - src+=srcStride; - } -} - -#if CONFIG_CAVS_DECODER -/* AVS specific */ -void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx); - -void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { - put_pixels8_c(dst, src, stride, 8); -} -void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { - avg_pixels8_c(dst, src, stride, 8); -} -void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { - put_pixels16_c(dst, src, stride, 16); -} -void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { - avg_pixels16_c(dst, src, stride, 16); -} -#endif /* CONFIG_CAVS_DECODER */ - -#if CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER -/* VC-1 specific */ -void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx); - -void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { - put_pixels8_c(dst, src, stride, 8); -} -#endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */ - -void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); - -/* H264 specific */ -void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx); - -#if CONFIG_RV30_DECODER -void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx); -#endif /* CONFIG_RV30_DECODER */ - -#if CONFIG_RV40_DECODER -static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ - put_pixels16_xy2_c(dst, src, stride, 16); -} -static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ - avg_pixels16_xy2_c(dst, src, stride, 16); -} -static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ - put_pixels8_xy2_c(dst, src, stride, 8); -} -static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ - avg_pixels8_xy2_c(dst, src, stride, 8); -} - -void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx); -#endif /* CONFIG_RV40_DECODER */ - -static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - int i; - - for(i=0; i>4]; - dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; - dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; - dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; - dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; - dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; - dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; - dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; - src++; - dst++; - } -} - -static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ - put_pixels8_c(dst, src, stride, 8); -} - -static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ - uint8_t half[64]; - wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); - put_pixels8_l2(dst, src, half, stride, stride, 8, 8); -} - -static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ - wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); -} - -static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ - uint8_t half[64]; - wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); - put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); -} - -static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ - wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); -} - -static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ - uint8_t halfH[88]; - uint8_t halfV[64]; - uint8_t halfHV[64]; - wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); - wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); - wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); - put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); -} -static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ - uint8_t halfH[88]; - uint8_t halfV[64]; - uint8_t halfHV[64]; - wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); - wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); - wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); - put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); -} -static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ - uint8_t halfH[88]; - wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); - wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); -} - -static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ - if(CONFIG_ANY_H263) { - int x; - const int strength= ff_h263_loop_filter_strength[qscale]; - - for(x=0; x<8; x++){ - int d1, d2, ad1; - int p0= src[x-2*stride]; - int p1= src[x-1*stride]; - int p2= src[x+0*stride]; - int p3= src[x+1*stride]; - int d = (p0 - p3 + 4*(p2 - p1)) / 8; - - if (d<-2*strength) d1= 0; - else if(d<- strength) d1=-2*strength - d; - else if(d< strength) d1= d; - else if(d< 2*strength) d1= 2*strength - d; - else d1= 0; - - p1 += d1; - p2 -= d1; - if(p1&256) p1= ~(p1>>31); - if(p2&256) p2= ~(p2>>31); - - src[x-1*stride] = p1; - src[x+0*stride] = p2; - - ad1= FFABS(d1)>>1; - - d2= av_clip((p0-p3)/4, -ad1, ad1); - - src[x-2*stride] = p0 - d2; - src[x+ stride] = p3 + d2; - } - } -} - -static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ - if(CONFIG_ANY_H263) { - int y; - const int strength= ff_h263_loop_filter_strength[qscale]; - - for(y=0; y<8; y++){ - int d1, d2, ad1; - int p0= src[y*stride-2]; - int p1= src[y*stride-1]; - int p2= src[y*stride+0]; - int p3= src[y*stride+1]; - int d = (p0 - p3 + 4*(p2 - p1)) / 8; - - if (d<-2*strength) d1= 0; - else if(d<- strength) d1=-2*strength - d; - else if(d< strength) d1= d; - else if(d< 2*strength) d1= 2*strength - d; - else d1= 0; - - p1 += d1; - p2 -= d1; - if(p1&256) p1= ~(p1>>31); - if(p2&256) p2= ~(p2>>31); - - src[y*stride-1] = p1; - src[y*stride+0] = p2; - - ad1= FFABS(d1)>>1; - - d2= av_clip((p0-p3)/4, -ad1, ad1); - - src[y*stride-2] = p0 - d2; - src[y*stride+1] = p3 + d2; - } - } -} - -static void h261_loop_filter_c(uint8_t *src, int stride){ - int x,y,xy,yz; - int temp[64]; - - for(x=0; x<8; x++){ - temp[x ] = 4*src[x ]; - temp[x + 7*8] = 4*src[x + 7*stride]; - } - for(y=1; y<7; y++){ - for(x=0; x<8; x++){ - xy = y * stride + x; - yz = y * 8 + x; - temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; - } - } - - for(y=0; y<8; y++){ - src[ y*stride] = (temp[ y*8] + 2)>>2; - src[7+y*stride] = (temp[7+y*8] + 2)>>2; - for(x=1; x<7; x++){ - xy = y * stride + x; - yz = y * 8 + x; - src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; - } - } -} - -static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) -{ - int i, d; - for( i = 0; i < 4; i++ ) { - if( tc0[i] < 0 ) { - pix += 4*ystride; - continue; - } - for( d = 0; d < 4; d++ ) { - const int p0 = pix[-1*xstride]; - const int p1 = pix[-2*xstride]; - const int p2 = pix[-3*xstride]; - const int q0 = pix[0]; - const int q1 = pix[1*xstride]; - const int q2 = pix[2*xstride]; - - if( FFABS( p0 - q0 ) < alpha && - FFABS( p1 - p0 ) < beta && - FFABS( q1 - q0 ) < beta ) { - - int tc = tc0[i]; - int i_delta; - - if( FFABS( p2 - p0 ) < beta ) { - pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); - tc++; - } - if( FFABS( q2 - q0 ) < beta ) { - pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); - tc++; - } - - i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); - pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */ - pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ - } - pix += ystride; - } - } -} -static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) -{ - h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); -} -static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) -{ - h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); -} - -static inline void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) -{ - int d; - for( d = 0; d < 16; d++ ) { - const int p2 = pix[-3*xstride]; - const int p1 = pix[-2*xstride]; - const int p0 = pix[-1*xstride]; - - const int q0 = pix[ 0*xstride]; - const int q1 = pix[ 1*xstride]; - const int q2 = pix[ 2*xstride]; - - if( FFABS( p0 - q0 ) < alpha && - FFABS( p1 - p0 ) < beta && - FFABS( q1 - q0 ) < beta ) { - - if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ - if( FFABS( p2 - p0 ) < beta) - { - const int p3 = pix[-4*xstride]; - /* p0', p1', p2' */ - pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; - pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; - pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; - } else { - /* p0' */ - pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; - } - if( FFABS( q2 - q0 ) < beta) - { - const int q3 = pix[3*xstride]; - /* q0', q1', q2' */ - pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; - pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; - pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; - } else { - /* q0' */ - pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; - } - }else{ - /* p0', q0' */ - pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; - pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; - } - } - pix += ystride; - } -} -static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) -{ - h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta); -} -static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) -{ - h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta); -} - -static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) -{ - int i, d; - for( i = 0; i < 4; i++ ) { - const int tc = tc0[i]; - if( tc <= 0 ) { - pix += 2*ystride; - continue; - } - for( d = 0; d < 2; d++ ) { - const int p0 = pix[-1*xstride]; - const int p1 = pix[-2*xstride]; - const int q0 = pix[0]; - const int q1 = pix[1*xstride]; - - if( FFABS( p0 - q0 ) < alpha && - FFABS( p1 - p0 ) < beta && - FFABS( q1 - q0 ) < beta ) { - - int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); - - pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */ - pix[0] = av_clip_uint8( q0 - delta ); /* q0' */ - } - pix += ystride; - } - } -} -static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) -{ - h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); -} -static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) -{ - h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); -} - -static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) -{ - int d; - for( d = 0; d < 8; d++ ) { - const int p0 = pix[-1*xstride]; - const int p1 = pix[-2*xstride]; - const int q0 = pix[0]; - const int q1 = pix[1*xstride]; - - if( FFABS( p0 - q0 ) < alpha && - FFABS( p1 - p0 ) < beta && - FFABS( q1 - q0 ) < beta ) { - - pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ - pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ - } - pix += ystride; - } -} -static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) -{ - h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta); -} -static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) -{ - h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta); -} - -static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) -{ - int s, i; - - s = 0; - for(i=0;iavctx->nsse_weight; - else return score1 + FFABS(score2)*8; -} - -static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ - MpegEncContext *c = v; - int score1=0; - int score2=0; - int x,y; - - for(y=0; yavctx->nsse_weight; - else return score1 + FFABS(score2)*8; -} - -static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ - int i; - unsigned int sum=0; - - for(i=0; i<8*8; i++){ - int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); - int w= weight[i]; - b>>= RECON_SHIFT; - assert(-512>4; - } - return sum>>2; -} - -static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ - int i; - - for(i=0; i<8*8; i++){ - rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); - } -} - -/** - * permutes an 8x8 block. - * @param block the block which will be permuted according to the given permutation vector - * @param permutation the permutation vector - * @param last the last non zero coefficient in scantable order, used to speed the permutation up - * @param scantable the used scantable, this is only used to speed the permutation up, the block is not - * (inverse) permutated to scantable order! - */ -void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) -{ - int i; - DCTELEM temp[64]; - - if(last<=0) return; - //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations - - for(i=0; i<=last; i++){ - const int j= scantable[i]; - temp[j]= block[j]; - block[j]=0; - } - - for(i=0; i<=last; i++){ - const int j= scantable[i]; - const int perm_j= permutation[j]; - block[perm_j]= temp[j]; - } -} - -static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ - return 0; -} - -void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ - int i; - - memset(cmp, 0, sizeof(void*)*6); - - for(i=0; i<6; i++){ - switch(type&0xFF){ - case FF_CMP_SAD: - cmp[i]= c->sad[i]; - break; - case FF_CMP_SATD: - cmp[i]= c->hadamard8_diff[i]; - break; - case FF_CMP_SSE: - cmp[i]= c->sse[i]; - break; - case FF_CMP_DCT: - cmp[i]= c->dct_sad[i]; - break; - case FF_CMP_DCT264: - cmp[i]= c->dct264_sad[i]; - break; - case FF_CMP_DCTMAX: - cmp[i]= c->dct_max[i]; - break; - case FF_CMP_PSNR: - cmp[i]= c->quant_psnr[i]; - break; - case FF_CMP_BIT: - cmp[i]= c->bit[i]; - break; - case FF_CMP_RD: - cmp[i]= c->rd[i]; - break; - case FF_CMP_VSAD: - cmp[i]= c->vsad[i]; - break; - case FF_CMP_VSSE: - cmp[i]= c->vsse[i]; - break; - case FF_CMP_ZERO: - cmp[i]= zero_cmp; - break; - case FF_CMP_NSSE: - cmp[i]= c->nsse[i]; - break; -#if CONFIG_SNOW_ENCODER - case FF_CMP_W53: - cmp[i]= c->w53[i]; - break; - case FF_CMP_W97: - cmp[i]= c->w97[i]; - break; -#endif - default: - av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); - } - } -} - -static void clear_block_c(DCTELEM *block) -{ - memset(block, 0, sizeof(DCTELEM)*64); -} - -/** - * memset(blocks, 0, sizeof(DCTELEM)*6*64) - */ -static void clear_blocks_c(DCTELEM *blocks) -{ - memset(blocks, 0, sizeof(DCTELEM)*6*64); -} - -static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ - long i; - for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ - long a = *(long*)(src+i); - long b = *(long*)(dst+i); - *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); - } - for(; imaxi){ - maxi=sum; - printf("MAX:%d\n", maxi); -} -#endif - return sum; -} - -static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ - int i; - int temp[64]; - int sum=0; - - assert(h==8); - - for(i=0; i<8; i++){ - //FIXME try pointer walks - BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); - BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); - BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); - BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); - - BUTTERFLY1(temp[8*i+0], temp[8*i+2]); - BUTTERFLY1(temp[8*i+1], temp[8*i+3]); - BUTTERFLY1(temp[8*i+4], temp[8*i+6]); - BUTTERFLY1(temp[8*i+5], temp[8*i+7]); - - BUTTERFLY1(temp[8*i+0], temp[8*i+4]); - BUTTERFLY1(temp[8*i+1], temp[8*i+5]); - BUTTERFLY1(temp[8*i+2], temp[8*i+6]); - BUTTERFLY1(temp[8*i+3], temp[8*i+7]); - } - - for(i=0; i<8; i++){ - BUTTERFLY1(temp[8*0+i], temp[8*1+i]); - BUTTERFLY1(temp[8*2+i], temp[8*3+i]); - BUTTERFLY1(temp[8*4+i], temp[8*5+i]); - BUTTERFLY1(temp[8*6+i], temp[8*7+i]); - - BUTTERFLY1(temp[8*0+i], temp[8*2+i]); - BUTTERFLY1(temp[8*1+i], temp[8*3+i]); - BUTTERFLY1(temp[8*4+i], temp[8*6+i]); - BUTTERFLY1(temp[8*5+i], temp[8*7+i]); - - sum += - BUTTERFLYA(temp[8*0+i], temp[8*4+i]) - +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) - +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) - +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); - } - - sum -= FFABS(temp[8*0] + temp[8*4]); // -mean - - return sum; -} - -static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ - MpegEncContext * const s= (MpegEncContext *)c; - DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); - DCTELEM * const temp= (DCTELEM*)aligned_temp; - - assert(h==8); - - s->dsp.diff_pixels(temp, src1, src2, stride); - s->dsp.fdct(temp); - return s->dsp.sum_abs_dctelem(temp); -} - -#if CONFIG_GPL -#define DCT8_1D {\ - const int s07 = SRC(0) + SRC(7);\ - const int s16 = SRC(1) + SRC(6);\ - const int s25 = SRC(2) + SRC(5);\ - const int s34 = SRC(3) + SRC(4);\ - const int a0 = s07 + s34;\ - const int a1 = s16 + s25;\ - const int a2 = s07 - s34;\ - const int a3 = s16 - s25;\ - const int d07 = SRC(0) - SRC(7);\ - const int d16 = SRC(1) - SRC(6);\ - const int d25 = SRC(2) - SRC(5);\ - const int d34 = SRC(3) - SRC(4);\ - const int a4 = d16 + d25 + (d07 + (d07>>1));\ - const int a5 = d07 - d34 - (d25 + (d25>>1));\ - const int a6 = d07 + d34 - (d16 + (d16>>1));\ - const int a7 = d16 - d25 + (d34 + (d34>>1));\ - DST(0, a0 + a1 ) ;\ - DST(1, a4 + (a7>>2)) ;\ - DST(2, a2 + (a3>>1)) ;\ - DST(3, a5 + (a6>>2)) ;\ - DST(4, a0 - a1 ) ;\ - DST(5, a6 - (a5>>2)) ;\ - DST(6, (a2>>1) - a3 ) ;\ - DST(7, (a4>>2) - a7 ) ;\ -} - -static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ - MpegEncContext * const s= (MpegEncContext *)c; - DCTELEM dct[8][8]; - int i; - int sum=0; - - s->dsp.diff_pixels(dct[0], src1, src2, stride); - -#define SRC(x) dct[i][x] -#define DST(x,v) dct[i][x]= v - for( i = 0; i < 8; i++ ) - DCT8_1D -#undef SRC -#undef DST - -#define SRC(x) dct[x][i] -#define DST(x,v) sum += FFABS(v) - for( i = 0; i < 8; i++ ) - DCT8_1D -#undef SRC -#undef DST - return sum; -} -#endif - -static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ - MpegEncContext * const s= (MpegEncContext *)c; - DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); - DCTELEM * const temp= (DCTELEM*)aligned_temp; - int sum=0, i; - - assert(h==8); - - s->dsp.diff_pixels(temp, src1, src2, stride); - s->dsp.fdct(temp); - - for(i=0; i<64; i++) - sum= FFMAX(sum, FFABS(temp[i])); - - return sum; -} - -static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ - MpegEncContext * const s= (MpegEncContext *)c; - DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]); - DCTELEM * const temp= (DCTELEM*)aligned_temp; - DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; - int sum=0, i; - - assert(h==8); - s->mb_intra=0; - - s->dsp.diff_pixels(temp, src1, src2, stride); - - memcpy(bak, temp, 64*sizeof(DCTELEM)); - - s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); - s->dct_unquantize_inter(s, temp, 0, s->qscale); - ff_simple_idct(temp); //FIXME - - for(i=0; i<64; i++) - sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); - - return sum; -} - -static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ - MpegEncContext * const s= (MpegEncContext *)c; - const uint8_t *scantable= s->intra_scantable.permutated; - DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); - DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]); - DCTELEM * const temp= (DCTELEM*)aligned_temp; - uint8_t * const bak= (uint8_t*)aligned_bak; - int i, last, run, bits, level, distortion, start_i; - const int esc_length= s->ac_esc_length; - uint8_t * length; - uint8_t * last_length; - - assert(h==8); - - for(i=0; i<8; i++){ - ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; - ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; - } - - s->dsp.diff_pixels(temp, src1, src2, stride); - - s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); - - bits=0; - - if (s->mb_intra) { - start_i = 1; - length = s->intra_ac_vlc_length; - last_length= s->intra_ac_vlc_last_length; - bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma - } else { - start_i = 0; - length = s->inter_ac_vlc_length; - last_length= s->inter_ac_vlc_last_length; - } - - if(last>=start_i){ - run=0; - for(i=start_i; i=0){ - if(s->mb_intra) - s->dct_unquantize_intra(s, temp, 0, s->qscale); - else - s->dct_unquantize_inter(s, temp, 0, s->qscale); - } - - s->dsp.idct_add(bak, stride, temp); - - distortion= s->dsp.sse[1](NULL, bak, src1, stride, 8); - - return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); -} - -static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ - MpegEncContext * const s= (MpegEncContext *)c; - const uint8_t *scantable= s->intra_scantable.permutated; - DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); - DCTELEM * const temp= (DCTELEM*)aligned_temp; - int i, last, run, bits, level, start_i; - const int esc_length= s->ac_esc_length; - uint8_t * length; - uint8_t * last_length; - - assert(h==8); - - s->dsp.diff_pixels(temp, src1, src2, stride); - - s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); - - bits=0; - - if (s->mb_intra) { - start_i = 1; - length = s->intra_ac_vlc_length; - last_length= s->intra_ac_vlc_last_length; - bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma - } else { - start_i = 0; - length = s->inter_ac_vlc_length; - last_length= s->inter_ac_vlc_last_length; - } - - if(last>=start_i){ - run=0; - for(i=start_i; i>31; - // is this faster on some gcc/cpu combinations? -// if(tmp > 0x43c0ffff) tmp = 0xFFFF; -// else tmp = 0; - } - return tmp - 0x8000; -} - -void ff_float_to_int16_c(int16_t *dst, const float *src, long len){ - int i; - for(i=0; i> shift; - - return res; -} - -#define W0 2048 -#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ -#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ -#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ -#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ -#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ -#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ -#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ - -static void wmv2_idct_row(short * b) -{ - int s1,s2; - int a0,a1,a2,a3,a4,a5,a6,a7; - /*step 1*/ - a1 = W1*b[1]+W7*b[7]; - a7 = W7*b[1]-W1*b[7]; - a5 = W5*b[5]+W3*b[3]; - a3 = W3*b[5]-W5*b[3]; - a2 = W2*b[2]+W6*b[6]; - a6 = W6*b[2]-W2*b[6]; - a0 = W0*b[0]+W0*b[4]; - a4 = W0*b[0]-W0*b[4]; - /*step 2*/ - s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7, - s2 = (181*(a1-a5-a7+a3)+128)>>8; - /*step 3*/ - b[0] = (a0+a2+a1+a5 + (1<<7))>>8; - b[1] = (a4+a6 +s1 + (1<<7))>>8; - b[2] = (a4-a6 +s2 + (1<<7))>>8; - b[3] = (a0-a2+a7+a3 + (1<<7))>>8; - b[4] = (a0-a2-a7-a3 + (1<<7))>>8; - b[5] = (a4-a6 -s2 + (1<<7))>>8; - b[6] = (a4+a6 -s1 + (1<<7))>>8; - b[7] = (a0+a2-a1-a5 + (1<<7))>>8; -} -static void wmv2_idct_col(short * b) -{ - int s1,s2; - int a0,a1,a2,a3,a4,a5,a6,a7; - /*step 1, with extended precision*/ - a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3; - a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3; - a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3; - a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3; - a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3; - a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3; - a0 = (W0*b[8*0]+W0*b[8*4] )>>3; - a4 = (W0*b[8*0]-W0*b[8*4] )>>3; - /*step 2*/ - s1 = (181*(a1-a5+a7-a3)+128)>>8; - s2 = (181*(a1-a5-a7+a3)+128)>>8; - /*step 3*/ - b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14; - b[8*1] = (a4+a6 +s1 + (1<<13))>>14; - b[8*2] = (a4-a6 +s2 + (1<<13))>>14; - b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14; - - b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14; - b[8*5] = (a4-a6 -s2 + (1<<13))>>14; - b[8*6] = (a4+a6 -s1 + (1<<13))>>14; - b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14; -} -void ff_wmv2_idct_c(short * block){ - int i; - - for(i=0;i<64;i+=8){ - wmv2_idct_row(block+i); - } - for(i=0;i<8;i++){ - wmv2_idct_col(block+i); - } -} -/* XXX: those functions should be suppressed ASAP when all IDCTs are - converted */ -static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block) -{ - ff_wmv2_idct_c(block); - put_pixels_clamped_c(block, dest, line_size); -} -static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block) -{ - ff_wmv2_idct_c(block); - add_pixels_clamped_c(block, dest, line_size); -} -static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) -{ - j_rev_dct (block); - put_pixels_clamped_c(block, dest, line_size); -} -static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) -{ - j_rev_dct (block); - add_pixels_clamped_c(block, dest, line_size); -} - -static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) -{ - j_rev_dct4 (block); - put_pixels_clamped4_c(block, dest, line_size); -} -static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) -{ - j_rev_dct4 (block); - add_pixels_clamped4_c(block, dest, line_size); -} - -static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) -{ - j_rev_dct2 (block); - put_pixels_clamped2_c(block, dest, line_size); -} -static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) -{ - j_rev_dct2 (block); - add_pixels_clamped2_c(block, dest, line_size); -} - -static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) -{ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - - dest[0] = cm[(block[0] + 4)>>3]; -} -static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) -{ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - - dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; -} - -static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } -#endif -/* init static data */ -void dsputil_static_init(void) -{ - int i; - - for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i; - for(i=0;i= 4.2.\n" - "Do not report crashes to FFmpeg developers.\n"); -#endif - did_fail=1; - } - return -1; - } - return 0; -} - -void dsputil_init(DSPContext* c) -{ - ff_check_alignment(); - - c->vector_fmul = vector_fmul_c; - c->vector_fmul_reverse = vector_fmul_reverse_c; - c->vector_fmul_add_add = ff_vector_fmul_add_add_c; - c->vector_fmul_window = ff_vector_fmul_window_c; -} - diff --git a/apps/codecs/libatrac/dsputil.h b/apps/codecs/libatrac/dsputil.h deleted file mode 100644 index 3bb0ff77a5..0000000000 --- a/apps/codecs/libatrac/dsputil.h +++ /dev/null @@ -1,898 +0,0 @@ -/* - * DSP utils - * Copyright (c) 2000, 2001, 2002 Fabrice Bellard - * Copyright (c) 2002-2004 Michael Niedermayer - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/** - * @file libavcodec/dsputil.h - * DSP utils. - * note, many functions in here may use MMX which trashes the FPU state, it is - * absolutely necessary to call emms_c() between dsp & float/double code - */ - -#ifndef AVCODEC_DSPUTIL_H -#define AVCODEC_DSPUTIL_H - -#include "libavutil/intreadwrite.h" -#include "avcodec.h" - - -//#define DEBUG -/* dct code */ -typedef short DCTELEM; -typedef int DWTELEM; -typedef short IDWTELEM; - -void fdct_ifast (DCTELEM *data); -void fdct_ifast248 (DCTELEM *data); -void ff_jpeg_fdct_islow (DCTELEM *data); -void ff_fdct248_islow (DCTELEM *data); - -void j_rev_dct (DCTELEM *data); -void j_rev_dct4 (DCTELEM *data); -void j_rev_dct2 (DCTELEM *data); -void j_rev_dct1 (DCTELEM *data); -void ff_wmv2_idct_c(DCTELEM *data); - -void ff_fdct_mmx(DCTELEM *block); -void ff_fdct_mmx2(DCTELEM *block); -void ff_fdct_sse2(DCTELEM *block); - -void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); -void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); -void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); - -void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, - const float *src2, int src3, int blocksize, int step); -void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, - const float *win, float add_bias, int len); -void ff_float_to_int16_c(int16_t *dst, const float *src, long len); -void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels); - -/* encoding scans */ -extern const uint8_t ff_alternate_horizontal_scan[64]; -extern const uint8_t ff_alternate_vertical_scan[64]; -extern const uint8_t ff_zigzag_direct[64]; -extern const uint8_t ff_zigzag248_direct[64]; - -/* pixel operations */ -#define MAX_NEG_CROP 1024 - -/* temporary */ -extern uint32_t ff_squareTbl[512]; -extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP]; - -/* VP3 DSP functions */ -void ff_vp3_idct_c(DCTELEM *block/* align 16*/); -void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); -void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); - -void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values); -void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values); - -/* VP6 DSP functions */ -void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride, - const int16_t *h_weights, const int16_t *v_weights); - -/* 1/2^n downscaling functions from imgconvert.c */ -void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); -void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); -void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); -void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); - -void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); - -/* minimum alignment rules ;) -If you notice errors in the align stuff, need more alignment for some ASM code -for some CPU or need to use a function with less aligned data then send a mail -to the ffmpeg-devel mailing list, ... - -!warning These alignments might not match reality, (missing attribute((align)) -stuff somewhere possible). -I (Michael) did not check them, these are just the alignments which I think -could be reached easily ... - -!future video codecs might need functions with less strict alignment -*/ - -/* -void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size); -void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); -void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); -void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); -void clear_blocks_c(DCTELEM *blocks); -*/ - -/* add and put pixel (decoding) */ -// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 -//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4 -typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h); -typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); -typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); -typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); -typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); -typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset); - -#define DEF_OLD_QPEL(name)\ -void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ -void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ -void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); - -DEF_OLD_QPEL(qpel16_mc11_old_c) -DEF_OLD_QPEL(qpel16_mc31_old_c) -DEF_OLD_QPEL(qpel16_mc12_old_c) -DEF_OLD_QPEL(qpel16_mc32_old_c) -DEF_OLD_QPEL(qpel16_mc13_old_c) -DEF_OLD_QPEL(qpel16_mc33_old_c) -DEF_OLD_QPEL(qpel8_mc11_old_c) -DEF_OLD_QPEL(qpel8_mc31_old_c) -DEF_OLD_QPEL(qpel8_mc12_old_c) -DEF_OLD_QPEL(qpel8_mc32_old_c) -DEF_OLD_QPEL(qpel8_mc13_old_c) -DEF_OLD_QPEL(qpel8_mc33_old_c) - -#define CALL_2X_PIXELS(a, b, n)\ -static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - b(block , pixels , line_size, h);\ - b(block+n, pixels+n, line_size, h);\ -} - -/* motion estimation */ -// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2 -// although currently h<4 is not used as functions with width <8 are neither used nor implemented -typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; - - -// for snow slices -typedef struct slice_buffer_s slice_buffer; - -/** - * Scantable. - */ -typedef struct ScanTable{ - const uint8_t *scantable; - uint8_t permutated[64]; - uint8_t raster_end[64]; -#if ARCH_PPC - /** Used by dct_quantize_altivec to find last-non-zero */ - DECLARE_ALIGNED(16, uint8_t, inverse[64]); -#endif -} ScanTable; - -void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable); - -void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, - int block_w, int block_h, - int src_x, int src_y, int w, int h); - -/** - * DSPContext. - */ -typedef struct DSPContext { - /* pixel ops : interface with DCT */ - void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); - void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); - void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); - void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); - void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); - void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size); - void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size); - int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/); - /** - * translational global motion compensation. - */ - void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); - /** - * global motion compensation. - */ - void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); - void (*clear_block)(DCTELEM *block/*align 16*/); - void (*clear_blocks)(DCTELEM *blocks/*align 16*/); - int (*pix_sum)(uint8_t * pix, int line_size); - int (*pix_norm1)(uint8_t * pix, int line_size); -// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4 - - me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */ - me_cmp_func sse[6]; - me_cmp_func hadamard8_diff[6]; - me_cmp_func dct_sad[6]; - me_cmp_func quant_psnr[6]; - me_cmp_func bit[6]; - me_cmp_func rd[6]; - me_cmp_func vsad[6]; - me_cmp_func vsse[6]; - me_cmp_func nsse[6]; - me_cmp_func w53[6]; - me_cmp_func w97[6]; - me_cmp_func dct_max[6]; - me_cmp_func dct264_sad[6]; - - me_cmp_func me_pre_cmp[6]; - me_cmp_func me_cmp[6]; - me_cmp_func me_sub_cmp[6]; - me_cmp_func mb_cmp[6]; - me_cmp_func ildct_cmp[6]; //only width 16 used - me_cmp_func frame_skip_cmp[6]; //only width 8 used - - int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, - int size); - - /** - * Halfpel motion compensation with rounding (a+b+1)>>1. - * this is an array[4][4] of motion compensation functions for 4 - * horizontal blocksizes (8,16) and the 4 halfpel positions
- * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] - * @param block destination where the result is stored - * @param pixels source - * @param line_size number of bytes in a horizontal line of block - * @param h height - */ - op_pixels_func put_pixels_tab[4][4]; - - /** - * Halfpel motion compensation with rounding (a+b+1)>>1. - * This is an array[4][4] of motion compensation functions for 4 - * horizontal blocksizes (8,16) and the 4 halfpel positions
- * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] - * @param block destination into which the result is averaged (a+b+1)>>1 - * @param pixels source - * @param line_size number of bytes in a horizontal line of block - * @param h height - */ - op_pixels_func avg_pixels_tab[4][4]; - - /** - * Halfpel motion compensation with no rounding (a+b)>>1. - * this is an array[2][4] of motion compensation functions for 2 - * horizontal blocksizes (8,16) and the 4 halfpel positions
- * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] - * @param block destination where the result is stored - * @param pixels source - * @param line_size number of bytes in a horizontal line of block - * @param h height - */ - op_pixels_func put_no_rnd_pixels_tab[4][4]; - - /** - * Halfpel motion compensation with no rounding (a+b)>>1. - * this is an array[2][4] of motion compensation functions for 2 - * horizontal blocksizes (8,16) and the 4 halfpel positions
- * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] - * @param block destination into which the result is averaged (a+b)>>1 - * @param pixels source - * @param line_size number of bytes in a horizontal line of block - * @param h height - */ - op_pixels_func avg_no_rnd_pixels_tab[4][4]; - - void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h); - - /** - * Thirdpel motion compensation with rounding (a+b+1)>>1. - * this is an array[12] of motion compensation functions for the 9 thirdpe - * positions
- * *pixels_tab[ xthirdpel + 4*ythirdpel ] - * @param block destination where the result is stored - * @param pixels source - * @param line_size number of bytes in a horizontal line of block - * @param h height - */ - tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width? - tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width? - - qpel_mc_func put_qpel_pixels_tab[2][16]; - qpel_mc_func avg_qpel_pixels_tab[2][16]; - qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; - qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; - qpel_mc_func put_mspel_pixels_tab[8]; - - /** - * h264 Chroma MC - */ - h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; - /* This is really one func used in VC-1 decoding */ - h264_chroma_mc_func put_no_rnd_h264_chroma_pixels_tab[3]; - h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; - - qpel_mc_func put_h264_qpel_pixels_tab[4][16]; - qpel_mc_func avg_h264_qpel_pixels_tab[4][16]; - - qpel_mc_func put_2tap_qpel_pixels_tab[4][16]; - qpel_mc_func avg_2tap_qpel_pixels_tab[4][16]; - - h264_weight_func weight_h264_pixels_tab[10]; - h264_biweight_func biweight_h264_pixels_tab[10]; - - /* AVS specific */ - qpel_mc_func put_cavs_qpel_pixels_tab[2][16]; - qpel_mc_func avg_cavs_qpel_pixels_tab[2][16]; - void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); - void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); - void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); - void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); - void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride); - - me_cmp_func pix_abs[2][4]; - - /* huffyuv specific */ - void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); - void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w); - void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); - /** - * subtract huffyuv's variant of median prediction - * note, this might read from src1[-1], src2[-1] - */ - void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top); - void (*add_hfyu_median_prediction)(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top); - /* this might write to dst[w] */ - void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); - void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); - - void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0); - /* v/h_loop_filter_luma_intra: align 16 */ - void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); - void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); - void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); - void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); - // h264_loop_filter_strength: simd only. the C version is inlined in h264.c - void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], - int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); - - void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); - void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); - - void (*h261_loop_filter)(uint8_t *src, int stride); - - void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale); - void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale); - - void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values); - void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values); - - void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride, - const int16_t *h_weights,const int16_t *v_weights); - - /* assume len is a multiple of 4, and arrays are 16-byte aligned */ - void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); - void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); - /* no alignment needed */ - void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc); - /* assume len is a multiple of 8, and arrays are 16-byte aligned */ - void (*vector_fmul)(float *dst, const float *src, int len); - void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); - /* assume len is a multiple of 8, and src arrays are 16-byte aligned */ - void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step); - /* assume len is a multiple of 4, and arrays are 16-byte aligned */ - void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len); - /* assume len is a multiple of 8, and arrays are 16-byte aligned */ - void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); - - /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767] - * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */ - void (*float_to_int16)(int16_t *dst, const float *src, long len); - void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels); - - /* (I)DCT */ - void (*fdct)(DCTELEM *block/* align 16*/); - void (*fdct248)(DCTELEM *block/* align 16*/); - - /* IDCT really*/ - void (*idct)(DCTELEM *block/* align 16*/); - - /** - * block -> idct -> clip to unsigned 8 bit -> dest. - * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...) - * @param line_size size in bytes of a horizontal line of dest - */ - void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); - - /** - * block -> idct -> add dest -> clip to unsigned 8 bit -> dest. - * @param line_size size in bytes of a horizontal line of dest - */ - void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); - - /** - * idct input permutation. - * several optimized IDCTs need a permutated input (relative to the normal order of the reference - * IDCT) - * this permutation must be performed before the idct_put/add, note, normally this can be merged - * with the zigzag/alternate scan
- * an example to avoid confusion: - * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...) - * - (x -> referece dct -> reference idct -> x) - * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x) - * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...) - */ - uint8_t idct_permutation[64]; - int idct_permutation_type; -#define FF_NO_IDCT_PERM 1 -#define FF_LIBMPEG2_IDCT_PERM 2 -#define FF_SIMPLE_IDCT_PERM 3 -#define FF_TRANSPOSE_IDCT_PERM 4 -#define FF_PARTTRANS_IDCT_PERM 5 -#define FF_SSE2_IDCT_PERM 6 - - int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale); - void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); -#define BASIS_SHIFT 16 -#define RECON_SHIFT 6 - - void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w); -#define EDGE_WIDTH 16 - - /* h264 functions */ - /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them - NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them - The reason for above, is that no 2 out of one list may use a different permutation. - */ - void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_dct)(DCTELEM block[4][4]); - void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - - /* snow wavelet */ - void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); - void (*horizontal_compose97i)(IDWTELEM *b, int width); - void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); - - void (*prefetch)(void *mem, int stride, int h); - - void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); - - /* vc1 functions */ - void (*vc1_inv_trans_8x8)(DCTELEM *b); - void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_v_overlap)(uint8_t* src, int stride); - void (*vc1_h_overlap)(uint8_t* src, int stride); - /* put 8x8 block with bicubic interpolation and quarterpel precision - * last argument is actually round value instead of height - */ - op_pixels_func put_vc1_mspel_pixels_tab[16]; - - /* intrax8 functions */ - void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize); - void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize, - int * range, int * sum, int edges); - - /* ape functions */ - /** - * Add contents of the second vector to the first one. - * @param len length of vectors, should be multiple of 16 - */ - void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len); - /** - * Add contents of the second vector to the first one. - * @param len length of vectors, should be multiple of 16 - */ - void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len); - /** - * Calculate scalar product of two vectors. - * @param len length of vectors, should be multiple of 16 - * @param shift number of bits to discard from product - */ - int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift); - - /* rv30 functions */ - qpel_mc_func put_rv30_tpel_pixels_tab[4][16]; - qpel_mc_func avg_rv30_tpel_pixels_tab[4][16]; - - /* rv40 functions */ - qpel_mc_func put_rv40_qpel_pixels_tab[4][16]; - qpel_mc_func avg_rv40_qpel_pixels_tab[4][16]; - h264_chroma_mc_func put_rv40_chroma_pixels_tab[3]; - h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3]; -} DSPContext; - -void dsputil_static_init(void); -void dsputil_init(DSPContext* p); - -int ff_check_alignment(void); - -/** - * permute block according to permuatation. - * @param last last non zero element in scantable order - */ -void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last); - -void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); - -#define BYTE_VEC32(c) ((c)*0x01010101UL) - -static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) -{ - return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); -} - -static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b) -{ - return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); -} - -static inline int get_penalty_factor(int lambda, int lambda2, int type){ - switch(type&0xFF){ - default: - case FF_CMP_SAD: - return lambda>>FF_LAMBDA_SHIFT; - case FF_CMP_DCT: - return (3*lambda)>>(FF_LAMBDA_SHIFT+1); - case FF_CMP_W53: - return (4*lambda)>>(FF_LAMBDA_SHIFT); - case FF_CMP_W97: - return (2*lambda)>>(FF_LAMBDA_SHIFT); - case FF_CMP_SATD: - case FF_CMP_DCT264: - return (2*lambda)>>FF_LAMBDA_SHIFT; - case FF_CMP_RD: - case FF_CMP_PSNR: - case FF_CMP_SSE: - case FF_CMP_NSSE: - return lambda2>>FF_LAMBDA_SHIFT; - case FF_CMP_BIT: - return 1; - } -} - -/** - * Empty mmx state. - * this must be called between any dsp function and float/double code. - * for example sin(); dsp->idct_put(); emms_c(); cos() - */ -#define emms_c() - -/* should be defined by architectures supporting - one or more MultiMedia extension */ -int mm_support(void); - -void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); - -#define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v) - -#if HAVE_MMX - -#undef emms_c - -extern int mm_flags; - -void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); -void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); -void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); - -static inline void emms(void) -{ - __asm__ volatile ("emms;":::"memory"); -} - - -#define emms_c() \ -{\ - if (mm_flags & FF_MM_MMX)\ - emms();\ -} - -void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx); - -#elif ARCH_ARM - -extern int mm_flags; - -#if HAVE_NEON -# define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v) -# define STRIDE_ALIGN 16 -#endif - -#elif ARCH_PPC - -extern int mm_flags; - -#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v) -#define STRIDE_ALIGN 16 - -#elif HAVE_MMI - -#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v) -#define STRIDE_ALIGN 16 - -#else - -#define mm_flags 0 -#define mm_support() 0 - -#endif - -#ifndef DECLARE_ALIGNED_8 -# define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(8, t, v) -#endif - -#ifndef STRIDE_ALIGN -# define STRIDE_ALIGN 8 -#endif - -/* PSNR */ -void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3], - int orig_linesize[3], int coded_linesize, - AVCodecContext *avctx); - -/* FFT computation */ - -/* NOTE: soon integer code will be added, so you must use the - FFTSample type */ -typedef float FFTSample; - -struct MDCTContext; - -typedef struct FFTComplex { - FFTSample re, im; -} FFTComplex; - -typedef struct FFTContext { - int nbits; - int inverse; - uint16_t *revtab; - FFTComplex *exptab; - FFTComplex *exptab1; /* only used by SSE code */ - FFTComplex *tmp_buf; - void (*fft_permute)(struct FFTContext *s, FFTComplex *z); - void (*fft_calc)(struct FFTContext *s, FFTComplex *z); - void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); - void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); -} FFTContext; - -extern FFTSample* ff_cos_tabs[13]; - -/** - * Sets up a complex FFT. - * @param nbits log2 of the length of the input array - * @param inverse if 0 perform the forward transform, if 1 perform the inverse - */ -int ff_fft_init(FFTContext *s, int nbits, int inverse); -void ff_fft_permute_c(FFTContext *s, FFTComplex *z); -void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); -void ff_fft_calc_c(FFTContext *s, FFTComplex *z); -void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); -void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); -void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); -void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z); - -/** - * Do the permutation needed BEFORE calling ff_fft_calc(). - */ -static inline void ff_fft_permute(FFTContext *s, FFTComplex *z) -{ - s->fft_permute(s, z); -} -/** - * Do a complex FFT with the parameters defined in ff_fft_init(). The - * input data must be permuted before. No 1.0/sqrt(n) normalization is done. - */ -static inline void ff_fft_calc(FFTContext *s, FFTComplex *z) -{ - s->fft_calc(s, z); -} -void ff_fft_end(FFTContext *s); - -/* MDCT computation */ - -typedef struct MDCTContext { - int n; /* size of MDCT (i.e. number of input data * 2) */ - int nbits; /* n = 2^nbits */ - /* pre/post rotation tables */ - FFTSample *tcos; - FFTSample *tsin; - FFTContext fft; -} MDCTContext; - -static inline void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input) -{ - s->fft.imdct_calc(s, output, input); -} -static inline void ff_imdct_half(MDCTContext *s, FFTSample *output, const FFTSample *input) -{ - s->fft.imdct_half(s, output, input); -} - -/** - * Generate a Kaiser-Bessel Derived Window. - * @param window pointer to half window - * @param alpha determines window shape - * @param n size of half window - */ -void ff_kbd_window_init(float *window, float alpha, int n); - -/** - * Generate a sine window. - * @param window pointer to half window - * @param n size of half window - */ -void ff_sine_window_init(float *window, int n); -extern float ff_sine_128 [ 128]; -extern float ff_sine_256 [ 256]; -extern float ff_sine_512 [ 512]; -extern float ff_sine_1024[1024]; -extern float ff_sine_2048[2048]; -extern float ff_sine_4096[4096]; -extern float *ff_sine_windows[6]; - -int ff_mdct_init(MDCTContext *s, int nbits, int inverse); -void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input); -void ff_mdct_end(MDCTContext *s); - -/* Real Discrete Fourier Transform */ - -enum RDFTransformType { - RDFT, - IRDFT, - RIDFT, - IRIDFT, -}; - -typedef struct { - int nbits; - int inverse; - int sign_convention; - - /* pre/post rotation tables */ - FFTSample *tcos; - FFTSample *tsin; - FFTContext fft; -} RDFTContext; - -/** - * Sets up a real FFT. - * @param nbits log2 of the length of the input array - * @param trans the type of transform - */ -int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans); -void ff_rdft_calc(RDFTContext *s, FFTSample *data); -void ff_rdft_end(RDFTContext *s); - -#define WRAPPER8_16(name8, name16)\ -static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ - return name8(s, dst , src , stride, h)\ - +name8(s, dst+8 , src+8 , stride, h);\ -} - -#define WRAPPER8_16_SQ(name8, name16)\ -static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ - int score=0;\ - score +=name8(s, dst , src , stride, 8);\ - score +=name8(s, dst+8 , src+8 , stride, 8);\ - if(h==16){\ - dst += 8*stride;\ - src += 8*stride;\ - score +=name8(s, dst , src , stride, 8);\ - score +=name8(s, dst+8 , src+8 , stride, 8);\ - }\ - return score;\ -} - - -static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) -{ - int i; - for(i=0; i> 1; - if(!(i&m)) return split_radix_permutation(i, m, inverse)*2; - m >>= 1; - if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1; - else return split_radix_permutation(i, m, inverse)*4 - 1; -} - -av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) -{ - int i, j, m, n; - float alpha, c1, s1, s2; - int split_radix = 1; - int av_unused has_vectors; - - if (nbits < 2 || nbits > 16) - goto fail; - s->nbits = nbits; - n = 1 << nbits; - - s->tmp_buf = NULL; - s->exptab = av_malloc((n / 2) * sizeof(FFTComplex)); - if (!s->exptab) - goto fail; - s->revtab = av_malloc(n * sizeof(uint16_t)); - if (!s->revtab) - goto fail; - s->inverse = inverse; - - s2 = inverse ? 1.0 : -1.0; - - s->fft_permute = ff_fft_permute_c; - s->fft_calc = ff_fft_calc_c; - s->imdct_calc = ff_imdct_calc_c; - s->imdct_half = ff_imdct_half_c; - s->exptab1 = NULL; - -#if HAVE_MMX && HAVE_YASM - has_vectors = mm_support(); - if (has_vectors & FF_MM_SSE && HAVE_SSE) { - /* SSE for P3/P4/K8 */ - s->imdct_calc = ff_imdct_calc_sse; - s->imdct_half = ff_imdct_half_sse; - s->fft_permute = ff_fft_permute_sse; - s->fft_calc = ff_fft_calc_sse; - } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) { - /* 3DNowEx for K7 */ - s->imdct_calc = ff_imdct_calc_3dn2; - s->imdct_half = ff_imdct_half_3dn2; - s->fft_calc = ff_fft_calc_3dn2; - } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) { - /* 3DNow! for K6-2/3 */ - s->imdct_calc = ff_imdct_calc_3dn; - s->imdct_half = ff_imdct_half_3dn; - s->fft_calc = ff_fft_calc_3dn; - } -#elif HAVE_ALTIVEC - has_vectors = mm_support(); - if (has_vectors & FF_MM_ALTIVEC) { - s->fft_calc = ff_fft_calc_altivec; - split_radix = 0; - } -#endif - - if (split_radix) { - for(j=4; j<=nbits; j++) { - int m = 1<revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i; - s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); - } else { - int np, nblocks, np2, l; - FFTComplex *q; - - for(i=0; i<(n/2); i++) { - alpha = 2 * M_PI * (float)i / (float)n; - c1 = cos(alpha); - s1 = sin(alpha) * s2; - s->exptab[i].re = c1; - s->exptab[i].im = s1; - } - - np = 1 << nbits; - nblocks = np >> 3; - np2 = np >> 1; - s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); - if (!s->exptab1) - goto fail; - q = s->exptab1; - do { - for(l = 0; l < np2; l += 2 * nblocks) { - *q++ = s->exptab[l]; - *q++ = s->exptab[l + nblocks]; - - q->re = -s->exptab[l].im; - q->im = s->exptab[l].re; - q++; - q->re = -s->exptab[l + nblocks].im; - q->im = s->exptab[l + nblocks].re; - q++; - } - nblocks = nblocks >> 1; - } while (nblocks != 0); - av_freep(&s->exptab); - - /* compute bit reverse table */ - for(i=0;i> j) & 1) << (nbits-j-1); - } - s->revtab[i]=m; - } - } - - return 0; - fail: - av_freep(&s->revtab); - av_freep(&s->exptab); - av_freep(&s->exptab1); - av_freep(&s->tmp_buf); - return -1; -} - -void ff_fft_permute_c(FFTContext *s, FFTComplex *z) -{ - int j, k, np; - FFTComplex tmp; - const uint16_t *revtab = s->revtab; - np = 1 << s->nbits; - - if (s->tmp_buf) { - /* TODO: handle split-radix permute in a more optimal way, probably in-place */ - for(j=0;jtmp_buf[revtab[j]] = z[j]; - memcpy(z, s->tmp_buf, np * sizeof(FFTComplex)); - return; - } - - /* reverse */ - for(j=0;jrevtab); - av_freep(&s->exptab); - av_freep(&s->exptab1); - av_freep(&s->tmp_buf); -} - -#define sqrthalf (float)M_SQRT1_2 - -#define BF(x,y,a,b) {\ - x = a - b;\ - y = a + b;\ -} - -#define BUTTERFLIES(a0,a1,a2,a3) {\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, a0.re, t5);\ - BF(a3.im, a1.im, a1.im, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, a1.re, t4);\ - BF(a2.im, a0.im, a0.im, t6);\ -} - -// force loading all the inputs before storing any. -// this is slightly slower for small data, but avoids store->load aliasing -// for addresses separated by large powers of 2. -#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ - FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, r0, t5);\ - BF(a3.im, a1.im, i1, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, r1, t4);\ - BF(a2.im, a0.im, i0, t6);\ -} - -#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ - t1 = a2.re * wre + a2.im * wim;\ - t2 = a2.im * wre - a2.re * wim;\ - t5 = a3.re * wre - a3.im * wim;\ - t6 = a3.im * wre + a3.re * wim;\ - BUTTERFLIES(a0,a1,a2,a3)\ -} - -#define TRANSFORM_ZERO(a0,a1,a2,a3) {\ - t1 = a2.re;\ - t2 = a2.im;\ - t5 = a3.re;\ - t6 = a3.im;\ - BUTTERFLIES(a0,a1,a2,a3)\ -} - -/* z[0...8n-1], w[1...2n-1] */ -#define PASS(name)\ -static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\ -{\ - FFTSample t1, t2, t3, t4, t5, t6;\ - int o1 = 2*n;\ - int o2 = 4*n;\ - int o3 = 6*n;\ - const FFTSample *wim = wre+o1;\ - n--;\ -\ - TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - do {\ - z += 2;\ - wre += 2;\ - wim -= 2;\ - TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - } while(--n);\ -} - -PASS(pass) -#undef BUTTERFLIES -#define BUTTERFLIES BUTTERFLIES_BIG -PASS(pass_big) - -#define DECL_FFT(n,n2,n4)\ -static void fft##n(FFTComplex *z)\ -{\ - fft##n2(z);\ - fft##n4(z+n4*2);\ - fft##n4(z+n4*3);\ - pass(z,ff_cos_##n,n4/2);\ -} - -static void fft4(FFTComplex *z) -{ - FFTSample t1, t2, t3, t4, t5, t6, t7, t8; - - BF(t3, t1, z[0].re, z[1].re); - BF(t8, t6, z[3].re, z[2].re); - BF(z[2].re, z[0].re, t1, t6); - BF(t4, t2, z[0].im, z[1].im); - BF(t7, t5, z[2].im, z[3].im); - BF(z[3].im, z[1].im, t4, t8); - BF(z[3].re, z[1].re, t3, t7); - BF(z[2].im, z[0].im, t2, t5); -} - -static void fft8(FFTComplex *z) -{ - FFTSample t1, t2, t3, t4, t5, t6, t7, t8; - - fft4(z); - - BF(t1, z[5].re, z[4].re, -z[5].re); - BF(t2, z[5].im, z[4].im, -z[5].im); - BF(t3, z[7].re, z[6].re, -z[7].re); - BF(t4, z[7].im, z[6].im, -z[7].im); - BF(t8, t1, t3, t1); - BF(t7, t2, t2, t4); - BF(z[4].re, z[0].re, z[0].re, t1); - BF(z[4].im, z[0].im, z[0].im, t2); - BF(z[6].re, z[2].re, z[2].re, t7); - BF(z[6].im, z[2].im, z[2].im, t8); - - TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf); -} - -#if !CONFIG_SMALL -static void fft16(FFTComplex *z) -{ - FFTSample t1, t2, t3, t4, t5, t6; - - fft8(z); - fft4(z+8); - fft4(z+12); - - TRANSFORM_ZERO(z[0],z[4],z[8],z[12]); - TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf); - TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]); - TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]); -} -#else -DECL_FFT(16,8,4) -#endif -DECL_FFT(32,16,8) -DECL_FFT(64,32,16) -DECL_FFT(128,64,32) -DECL_FFT(256,128,64) -DECL_FFT(512,256,128) -#if !CONFIG_SMALL -#define pass pass_big -#endif -DECL_FFT(1024,512,256) -DECL_FFT(2048,1024,512) -DECL_FFT(4096,2048,1024) -DECL_FFT(8192,4096,2048) -DECL_FFT(16384,8192,4096) -DECL_FFT(32768,16384,8192) -DECL_FFT(65536,32768,16384) - -static void (*fft_dispatch[])(FFTComplex*) = { - fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024, - fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, -}; - -void ff_fft_calc_c(FFTContext *s, FFTComplex *z) -{ - fft_dispatch[s->nbits-2](z); -} - diff --git a/apps/codecs/libatrac/fixp_math.c b/apps/codecs/libatrac/fixp_math.c new file mode 100644 index 0000000000..3f578a1ade --- /dev/null +++ b/apps/codecs/libatrac/fixp_math.c @@ -0,0 +1,66 @@ +#include "fixp_math.h" + +inline int32_t fixmul31(int32_t x, int32_t y) +{ + int64_t temp; + temp = x; + temp *= y; + + temp >>= 31; //16+31-16 = 31 bits + + return (int32_t)temp; +} + +/* + * Fast integer square root adapted from algorithm, + * Martin Guy @ UKC, June 1985. + * Originally from a book on programming abaci by Mr C. Woo. + * This is taken from : + * http://wiki.forum.nokia.com/index.php/How_to_use_fixed_point_maths#How_to_get_square_root_for_integers + * with a added shift up of the result by 8 bits to return result in 16.16 fixed-point representation. + */ +inline int32_t fastSqrt(int32_t n) +{ + /* + * Logically, these are unsigned. + * We need the sign bit to test + * whether (op - res - one) underflowed. + */ + int32_t op, res, one; + op = n; + res = 0; + /* "one" starts at the highest power of four <= than the argument. */ + one = 1 << 30; /* second-to-top bit set */ + while (one > op) one >>= 2; + while (one != 0) + { + if (op >= res + one) + { + op = op - (res + one); + res = res + (one<<1); + } + res >>= 1; + one >>= 2; + } + return(res << 8); +} + +inline int32_t fixmul16(int32_t x, int32_t y) +{ + int64_t temp; + temp = x; + temp *= y; + + temp >>= 16; + + return (int32_t)temp; +} + +inline int32_t fixdiv16(int32_t x, int32_t y) +{ + int64_t temp; + temp = x << 16; + temp /= y; + + return (int32_t)temp; +} diff --git a/apps/codecs/libatrac/fixp_math.h b/apps/codecs/libatrac/fixp_math.h new file mode 100644 index 0000000000..5bfc2c5703 --- /dev/null +++ b/apps/codecs/libatrac/fixp_math.h @@ -0,0 +1,14 @@ +#include + +/* Macros for converting between various fixed-point representations and floating point. */ +#define ONE_16 (1L << 16) +#define fixtof64(x) (float)((float)(x) / (float)(1 << 16)) //does not work on int64_t! +#define ftofix32(x) ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5 : 0.5))) +#define ftofix31(x) ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5 : 0.5))) +#define fix31tof64(x) (float)((float)(x) / (float)(1 << 31)) + +/* Fixed point math routines for use in atrac3.c */ +inline int32_t fixdiv16(int32_t x, int32_t y); +inline int32_t fixmul16(int32_t x, int32_t y); +inline int32_t fixmul31(int32_t x, int32_t y); +inline int32_t fastSqrt(int32_t n); diff --git a/apps/codecs/libatrac/mdct.c b/apps/codecs/libatrac/mdct.c deleted file mode 100644 index 670b6d381e..0000000000 --- a/apps/codecs/libatrac/mdct.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - * MDCT/IMDCT transforms - * Copyright (c) 2002 Fabrice Bellard - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "dsputil.h" - -#ifndef M_E -#define M_E 2.7182818284590452354 /* e */ -#endif -#ifndef M_LN2 -#define M_LN2 0.69314718055994530942 /* log_e 2 */ -#endif -#ifndef M_LN10 -#define M_LN10 2.30258509299404568402 /* log_e 10 */ -#endif -#ifndef M_PI -#define M_PI 3.14159265358979323846 /* pi */ -#endif -#ifndef M_SQRT1_2 -#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ -#endif - -/** - * @file libavcodec/mdct.c - * MDCT/IMDCT transforms. - */ - -// Generate a Kaiser-Bessel Derived Window. -#define BESSEL_I0_ITER 50 // default: 50 iterations of Bessel I0 approximation -av_cold void ff_kbd_window_init(float *window, float alpha, int n) -{ - int i, j; - double sum = 0.0, bessel, tmp; - double local_window[n]; - double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n); - - for (i = 0; i < n; i++) { - tmp = i * (n - i) * alpha2; - bessel = 1.0; - for (j = BESSEL_I0_ITER; j > 0; j--) - bessel = bessel * tmp / (j * j) + 1; - sum += bessel; - local_window[i] = sum; - } - - sum++; - for (i = 0; i < n; i++) - window[i] = sqrt(local_window[i] / sum); -} - -DECLARE_ALIGNED(16, float, ff_sine_128 [ 128]); -DECLARE_ALIGNED(16, float, ff_sine_256 [ 256]); -DECLARE_ALIGNED(16, float, ff_sine_512 [ 512]); -DECLARE_ALIGNED(16, float, ff_sine_1024[1024]); -DECLARE_ALIGNED(16, float, ff_sine_2048[2048]); -DECLARE_ALIGNED(16, float, ff_sine_4096[4096]); -float *ff_sine_windows[6] = { - ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, ff_sine_2048, ff_sine_4096 -}; - -// Generate a sine window. -av_cold void ff_sine_window_init(float *window, int n) { - int i; - for(i = 0; i < n; i++) - window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n))); -} - -/** - * init MDCT or IMDCT computation. - */ -av_cold int ff_mdct_init(MDCTContext *s, int nbits, int inverse) -{ - int n, n4, i; - double alpha; - - memset(s, 0, sizeof(*s)); - n = 1 << nbits; - s->nbits = nbits; - s->n = n; - n4 = n >> 2; - s->tcos = av_malloc(n4 * sizeof(FFTSample)); - if (!s->tcos) - goto fail; - s->tsin = av_malloc(n4 * sizeof(FFTSample)); - if (!s->tsin) - goto fail; - - for(i=0;itcos[i] = -cos(alpha); - s->tsin[i] = -sin(alpha); - } - if (ff_fft_init(&s->fft, s->nbits - 2, inverse) < 0) - goto fail; - return 0; - fail: - av_freep(&s->tcos); - av_freep(&s->tsin); - return -1; -} - -/* complex multiplication: p = a * b */ -#define CMUL(pre, pim, are, aim, bre, bim) \ -{\ - FFTSample _are = (are);\ - FFTSample _aim = (aim);\ - FFTSample _bre = (bre);\ - FFTSample _bim = (bim);\ - (pre) = _are * _bre - _aim * _bim;\ - (pim) = _are * _bim + _aim * _bre;\ -} - -/** - * Compute the middle half of the inverse MDCT of size N = 2^nbits, - * thus excluding the parts that can be derived by symmetry - * @param output N/2 samples - * @param input N/2 samples - */ -void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input) -{ - int k, n8, n4, n2, n, j; - const uint16_t *revtab = s->fft.revtab; - const FFTSample *tcos = s->tcos; - const FFTSample *tsin = s->tsin; - const FFTSample *in1, *in2; - FFTComplex *z = (FFTComplex *)output; - - n = 1 << s->nbits; - n2 = n >> 1; - n4 = n >> 2; - n8 = n >> 3; - - /* pre rotation */ - in1 = input; - in2 = input + n2 - 1; - for(k = 0; k < n4; k++) { - j=revtab[k]; - CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); - in1 += 2; - in2 -= 2; - } - ff_fft_calc(&s->fft, z); - - /* post rotation + reordering */ - output += n4; - for(k = 0; k < n8; k++) { - FFTSample r0, i0, r1, i1; - CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]); - CMUL(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]); - z[n8-k-1].re = r0; - z[n8-k-1].im = i0; - z[n8+k ].re = r1; - z[n8+k ].im = i1; - } -} - -/** - * Compute inverse MDCT of size N = 2^nbits - * @param output N samples - * @param input N/2 samples - */ -void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input) -{ - int k; - int n = 1 << s->nbits; - int n2 = n >> 1; - int n4 = n >> 2; - - ff_imdct_half_c(s, output+n4, input); - - for(k = 0; k < n4; k++) { - output[k] = -output[n2-k-1]; - output[n-k-1] = output[n2+k]; - } -} - -/** - * Compute MDCT of size N = 2^nbits - * @param input N samples - * @param out N/2 samples - */ -void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input) -{ - int i, j, n, n8, n4, n2, n3; - FFTSample re, im; - const uint16_t *revtab = s->fft.revtab; - const FFTSample *tcos = s->tcos; - const FFTSample *tsin = s->tsin; - FFTComplex *x = (FFTComplex *)out; - - n = 1 << s->nbits; - n2 = n >> 1; - n4 = n >> 2; - n8 = n >> 3; - n3 = 3 * n4; - - /* pre rotation */ - for(i=0;ifft, x); - - /* post rotation */ - for(i=0;itcos); - av_freep(&s->tsin); - ff_fft_end(&s->fft); -} -- cgit v1.2.3