Import libatrac from ffmpeg and modify librm to support ATRAC3.

The decoder is still in floating point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22235 a1c6a512-1295-4272-9138-f99709370657
author: Mohamed Tarek <mt@rockbox.org> 2009-08-10 14:46:31 +0000
committer: Mohamed Tarek <mt@rockbox.org> 2009-08-10 14:46:31 +0000
commit: 519adfbaae5933ec18d2e3cb77e690e36cd49916 (patch)
tree: 8303456ddf72f2ff5da2b9a99802c73e1409fc28 /apps/codecs
parent: 1c0aeb18ca264e1f64e31f219189afab442676eb (diff)
download: rockbox-519adfbaae5933ec18d2e3cb77e690e36cd49916.tar.gz
rockbox-519adfbaae5933ec18d2e3cb77e690e36cd49916.zip
25 files changed, 13440 insertions, 9 deletions
diff --git a/apps/codecs/libatrac/Makefile.test b/apps/codecs/libatrac/Makefile.test
new file mode 100644
index 0000000000..56b50b90fa
--- /dev/null
+++ b/apps/codecs/libatrac/Makefile.test
@@ -0,0 +1,11 @@
+CFLAGS = -Wall -O3 -DTEST -D"DEBUGF=printf"
+OBJS = atrac3.o dsputil.o bitstream.o fft.o mdct.o libavutil/log.o libavutil/mem.o ../librm/rm.o
+atractest: $(OBJS)
+        gcc -o atractest $(OBJS) -lm
+.c.o :
+        $(CC) $(CFLAGS) -c -o $@ $<
+clean:
+        rm -f atractest $(OBJS) *~ output.wav
diff --git a/apps/codecs/libatrac/README.rockbox b/apps/codecs/libatrac/README.rockbox
new file mode 100644
index 0000000000..7f62e10bb4
--- /dev/null
+++ b/apps/codecs/libatrac/README.rockbox
@@ -0,0 +1,20 @@
+Library: libatrac
+Imported by : Mohamed Tarek
+Import date : 10-August-2009
+LICENSING INFORMATION
+ffmpeg is licensed under the Lesser GNU General Public License.
+IMPORT DETAILS
+The decoder is based on ffmpeg-svn r18079. It still uses floating 
+point math and not suitable to be used in rockbox.
+TESTING
+The test program should compile in any Unix-like environment using the
+command "make -f Makefile.test".
+Running "./atractest file.rm" will decode the audio data to a WAV file
+called "output.wav" in the current directory.
diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c
new file mode 100644
index 0000000000..a800511397
--- /dev/null
+++ b/apps/codecs/libatrac/atrac3.c
@@ -0,0 +1,1249 @@
+/*
+ * Atrac 3 compatible decoder
+ * Copyright (c) 2006-2008 Maxim Poliakovski
+ * Copyright (c) 2006-2008 Benjamin Larsson
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/atrac3.c
+ * Atrac 3 compatible decoder.
+ * This decoder handles Sony's ATRAC3 data.
+ *
+ * Container formats used to store atrac 3 data:
+ * RealMedia (.rm), RIFF WAV (.wav, .at3), Sony OpenMG (.oma, .aa3).
+ *
+ * To use this decoder, a calling application must supply the extradata
+ * bytes provided in the containers above.
+ */
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+#include "avcodec.h"
+#include "bitstream.h"
+#include "dsputil.h"
+#include "bytestream.h"
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include "../librm/rm.h"
+#include "atrac3data.h"
+#define JOINT_STEREO    0x12
+#define STEREO          0x2
+#define AVERROR(...) -1
+/* These structures are needed to store the parsed gain control data. */
+typedef struct {
+    int   num_gain_data;
+    int   levcode[8];
+    int   loccode[8];
+} gain_info;
+typedef struct {
+    gain_info   gBlock[4];
+} gain_block;
+typedef struct {
+    int     pos;
+    int     numCoefs;
+    float   coef[8];
+} tonal_component;
+typedef struct {
+    int               bandsCoded;
+    int               numComponents;
+    tonal_component   components[64];
+    float             prevFrame[1024];
+    int               gcBlkSwitch;
+    gain_block        gainBlock[2];
+    DECLARE_ALIGNED_16(float, spectrum[1024]);
+    DECLARE_ALIGNED_16(float, IMDCT_buf[1024]);
+    float             delayBuf1[46]; ///<qmf delay buffers
+    float             delayBuf2[46];
+    float             delayBuf3[46];
+} channel_unit;
+typedef struct {
+    GetBitContext       gb;
+    //@{
+    /** stream data */
+    int                 channels;
+    int                 codingMode;
+    int                 bit_rate;
+    int                 sample_rate;
+    int                 samples_per_channel;
+    int                 samples_per_frame;
+    int                 bits_per_frame;
+    int                 bytes_per_frame;
+    int                 pBs;
+    channel_unit*       pUnits;
+    //@}
+    //@{
+    /** joint-stereo related variables */
+    int                 matrix_coeff_index_prev[4];
+    int                 matrix_coeff_index_now[4];
+    int                 matrix_coeff_index_next[4];
+    int                 weighting_delay[6];
+    //@}
+    //@{
+    /** data buffers */
+    float               outSamples[2048];
+    uint8_t*            decoded_bytes_buffer;
+    float               tempBuf[1070];
+    //@}
+    //@{
+    /** extradata */
+    int                 atrac3version;
+    int                 delay;
+    int                 scrambled_stream;
+    int                 frame_factor;
+    //@}
+} ATRAC3Context;
+static DECLARE_ALIGNED_16(float,mdct_window[512]);
+static float            qmf_window[48];
+static VLC              spectral_coeff_tab[7];
+static float            SFTable[64];
+static float            gain_tab1[16];
+static float            gain_tab2[31];
+static MDCTContext      mdct_ctx;
+static DSPContext       dsp;
+/* quadrature mirror synthesis filter */
+/**
+ * Quadrature mirror synthesis filter.
+ *
+ * @param inlo      lower part of spectrum
+ * @param inhi      higher part of spectrum
+ * @param nIn       size of spectrum buffer
+ * @param pOut      out buffer
+ * @param delayBuf  delayBuf buffer
+ * @param temp      temp buffer
+ */
+static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float *delayBuf, float *temp)
+{
+    int   i, j;
+    float   *p1, *p3;
+    memcpy(temp, delayBuf, 46*sizeof(float));
+    p3 = temp + 46;
+    /* loop1 */
+    for(i=0; i<nIn; i+=2){
+        p3[2*i+0] = inlo[i  ] + inhi[i  ];
+        p3[2*i+1] = inlo[i  ] - inhi[i  ];
+        p3[2*i+2] = inlo[i+1] + inhi[i+1];
+        p3[2*i+3] = inlo[i+1] - inhi[i+1];
+    }
+    /* loop2 */
+    p1 = temp;
+    for (j = nIn; j != 0; j--) {
+        float s1 = 0.0;
+        float s2 = 0.0;
+        for (i = 0; i < 48; i += 2) {
+            s1 += p1[i] * qmf_window[i];
+            s2 += p1[i+1] * qmf_window[i+1];
+        }
+        pOut[0] = s2;
+        pOut[1] = s1;
+        p1 += 2;
+        pOut += 2;
+    }
+    /* Update the delay buffer. */
+    memcpy(delayBuf, temp + nIn*2, 46*sizeof(float));
+}
+/**
+ * Regular 512 points IMDCT without overlapping, with the exception of the swapping of odd bands
+ * caused by the reverse spectra of the QMF.
+ *
+ * @param pInput    float input
+ * @param pOutput   float output
+ * @param odd_band  1 if the band is an odd band
+ */
+static void IMLT(float *pInput, float *pOutput, int odd_band)
+{
+    int     i;
+    if (odd_band) {
+        /**
+        * Reverse the odd bands before IMDCT, this is an effect of the QMF transform
+        * or it gives better compression to do it this way.
+        * FIXME: It should be possible to handle this in ff_imdct_calc
+        * for that to happen a modification of the prerotation step of
+        * all SIMD code and C code is needed.
+        * Or fix the functions before so they generate a pre reversed spectrum.
+        */
+        for (i=0; i<128; i++)
+            FFSWAP(float, pInput[i], pInput[255-i]);
+    }
+    ff_imdct_calc(&mdct_ctx,pOutput,pInput);
+    /* Perform windowing on the output. */
+    dsp.vector_fmul(pOutput,mdct_window,512);
+}
+/**
+ * Atrac 3 indata descrambling, only used for data coming from the rm container
+ *
+ * @param in        pointer to 8 bit array of indata
+ * @param bits      amount of bits
+ * @param out       pointer to 8 bit array of outdata
+ */
+static int decode_bytes(const uint8_t* inbuffer, uint8_t* out, int bytes){
+    int i, off;
+    uint32_t c;
+    const uint32_t* buf;
+    uint32_t* obuf = (uint32_t*) out;
+#ifdef TEST
+    off = 0; //no check for memory alignment of inbuffer
+#else
+    off = (intptr_t)inbuffer & 3;
+#endif /* TEST */
+    buf = (const uint32_t*) (inbuffer - off);
+    c = be2me_32((0x537F6103 >> (off*8)) | (0x537F6103 << (32-(off*8))));
+    bytes += 3 + off;
+    for (i = 0; i < bytes/4; i++)
+        obuf[i] = c ^ buf[i];
+    if (off)
+        av_log(NULL,AV_LOG_DEBUG,"Offset of %d not handled, post sample on ffmpeg-dev.\n",off);
+    return off;
+}
+static av_cold void init_atrac3_transforms(ATRAC3Context *q) {
+    float enc_window[256];
+    float s;
+    int i;
+    /* Generate the mdct window, for details see
+     * http://wiki.multimedia.cx/index.php?title=RealAudio_atrc#Windows */
+    for (i=0 ; i<256; i++)
+        enc_window[i] = (sin(((i + 0.5) / 256.0 - 0.5) * M_PI) + 1.0) * 0.5;
+    if (!mdct_window[0])
+        for (i=0 ; i<256; i++) {
+            mdct_window[i] = enc_window[i]/(enc_window[i]*enc_window[i] + enc_window[255-i]*enc_window[255-i]);
+            mdct_window[511-i] = mdct_window[i];
+        }
+    /* Generate the QMF window. */
+    for (i=0 ; i<24; i++) {
+        s = qmf_48tap_half[i] * 2.0;
+        qmf_window[i] = s;
+        qmf_window[47 - i] = s;
+    }
+    /* Initialize the MDCT transform. */
+    ff_mdct_init(&mdct_ctx, 9, 1);
+}
+/**
+ * Atrac3 uninit, free all allocated memory
+ */
+static av_cold int atrac3_decode_close(ATRAC3Context *q)
+{
+    //ATRAC3Context *q = rmctx->priv_data;
+    av_free(q->pUnits);
+    av_free(q->decoded_bytes_buffer);
+    return 0;
+}
+/**
+/ * Mantissa decoding
+ *
+ * @param gb            the GetBit context
+ * @param selector      what table is the output values coded with
+ * @param codingFlag    constant length coding or variable length coding
+ * @param mantissas     mantissa output table
+ * @param numCodes      amount of values to get
+ */
+static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int codingFlag, int* mantissas, int numCodes)
+{
+    int   numBits, cnt, code, huffSymb;
+    if (selector == 1)
+        numCodes /= 2;
+    if (codingFlag != 0) {
+        /* constant length coding (CLC) */
+        numBits = CLCLengthTab[selector];
+        if (selector > 1) {
+            for (cnt = 0; cnt < numCodes; cnt++) {
+                if (numBits)
+                    code = get_sbits(gb, numBits);
+                else
+                    code = 0;
+                mantissas[cnt] = code;
+            }
+        } else {
+            for (cnt = 0; cnt < numCodes; cnt++) {
+                if (numBits)
+                    code = get_bits(gb, numBits); //numBits is always 4 in this case
+                else
+                    code = 0;
+                mantissas[cnt*2] = seTab_0[code >> 2];
+                mantissas[cnt*2+1] = seTab_0[code & 3];
+            }
+        }
+    } else {
+        /* variable length coding (VLC) */
+        if (selector != 1) {
+            for (cnt = 0; cnt < numCodes; cnt++) {
+                huffSymb = get_vlc2(gb, spectral_coeff_tab[selector-1].table, spectral_coeff_tab[selector-1].bits, 3);
+                huffSymb += 1;
+                code = huffSymb >> 1;
+                if (huffSymb & 1)
+                    code = -code;
+                mantissas[cnt] = code;
+            }
+        } else {
+            for (cnt = 0; cnt < numCodes; cnt++) {
+                huffSymb = get_vlc2(gb, spectral_coeff_tab[selector-1].table, spectral_coeff_tab[selector-1].bits, 3);
+                mantissas[cnt*2] = decTable1[huffSymb*2];
+                mantissas[cnt*2+1] = decTable1[huffSymb*2+1];
+            }
+        }
+    }
+}
+/**
+ * Restore the quantized band spectrum coefficients
+ *
+ * @param gb            the GetBit context
+ * @param pOut          decoded band spectrum
+ * @return outSubbands   subband counter, fix for broken specification/files
+ */
+static int decodeSpectrum (GetBitContext *gb, float *pOut)
+{
+    int   numSubbands, codingMode, cnt, first, last, subbWidth, *pIn;
+    int   subband_vlc_index[32], SF_idxs[32];
+    int   mantissas[128];
+    float SF;
+    numSubbands = get_bits(gb, 5); // number of coded subbands
+    codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC
+    /* Get the VLC selector table for the subbands, 0 means not coded. */
+    for (cnt = 0; cnt <= numSubbands; cnt++)
+        subband_vlc_index[cnt] = get_bits(gb, 3);
+    /* Read the scale factor indexes from the stream. */
+    for (cnt = 0; cnt <= numSubbands; cnt++) {
+        if (subband_vlc_index[cnt] != 0)
+            SF_idxs[cnt] = get_bits(gb, 6);
+    }
+    for (cnt = 0; cnt <= numSubbands; cnt++) {
+        first = subbandTab[cnt];
+        last = subbandTab[cnt+1];
+        subbWidth = last - first;
+        if (subband_vlc_index[cnt] != 0) {
+            /* Decode spectral coefficients for this subband. */
+            /* TODO: This can be done faster is several blocks share the
+             * same VLC selector (subband_vlc_index) */
+            readQuantSpectralCoeffs (gb, subband_vlc_index[cnt], codingMode, mantissas, subbWidth);
+            /* Decode the scale factor for this subband. */
+            SF = SFTable[SF_idxs[cnt]] * iMaxQuant[subband_vlc_index[cnt]];
+            /* Inverse quantize the coefficients. */
+            for (pIn=mantissas ; first<last; first++, pIn++)
+                pOut[first] = *pIn * SF;
+        } else {
+            /* This subband was not coded, so zero the entire subband. */
+            memset(pOut+first, 0, subbWidth*sizeof(float));
+        }
+    }
+    /* Clear the subbands that were not coded. */
+    first = subbandTab[cnt];
+    memset(pOut+first, 0, (1024 - first) * sizeof(float));
+    return numSubbands;
+}
+/**
+ * Restore the quantized tonal components
+ *
+ * @param gb            the GetBit context
+ * @param pComponent    tone component
+ * @param numBands      amount of coded bands
+ */
+static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent, int numBands)
+{
+    int i,j,k,cnt;
+    int   components, coding_mode_selector, coding_mode, coded_values_per_component;
+    int   sfIndx, coded_values, max_coded_values, quant_step_index, coded_components;
+    int   band_flags[4], mantissa[8];
+    float  *pCoef;
+    float  scalefactor;
+    int   component_count = 0;
+    components = get_bits(gb,5);
+    /* no tonal components */
+    if (components == 0)
+        return 0;
+    coding_mode_selector = get_bits(gb,2);
+    if (coding_mode_selector == 2)
+        return -1;
+    coding_mode = coding_mode_selector & 1;
+    for (i = 0; i < components; i++) {
+        for (cnt = 0; cnt <= numBands; cnt++)
+            band_flags[cnt] = get_bits1(gb);
+        coded_values_per_component = get_bits(gb,3);
+        quant_step_index = get_bits(gb,3);
+        if (quant_step_index <= 1)
+            return -1;
+        if (coding_mode_selector == 3)
+            coding_mode = get_bits1(gb);
+        for (j = 0; j < (numBands + 1) * 4; j++) {
+            if (band_flags[j >> 2] == 0)
+                continue;
+            coded_components = get_bits(gb,3);
+            for (k=0; k<coded_components; k++) {
+                sfIndx = get_bits(gb,6);
+                pComponent[component_count].pos = j * 64 + (get_bits(gb,6));
+                max_coded_values = 1024 - pComponent[component_count].pos;
+                coded_values = coded_values_per_component + 1;
+                coded_values = FFMIN(max_coded_values,coded_values);
+                scalefactor = SFTable[sfIndx] * iMaxQuant[quant_step_index];
+                readQuantSpectralCoeffs(gb, quant_step_index, coding_mode, mantissa, coded_values);
+                pComponent[component_count].numCoefs = coded_values;
+                /* inverse quant */
+                pCoef = pComponent[component_count].coef;
+                for (cnt = 0; cnt < coded_values; cnt++)
+                    pCoef[cnt] = mantissa[cnt] * scalefactor;
+                component_count++;
+            }
+        }
+    }
+    return component_count;
+}
+/**
+ * Decode gain parameters for the coded bands
+ *
+ * @param gb            the GetBit context
+ * @param pGb           the gainblock for the current band
+ * @param numBands      amount of coded bands
+ */
+static int decodeGainControl (GetBitContext *gb, gain_block *pGb, int numBands)
+{
+    int   i, cf, numData;
+    int   *pLevel, *pLoc;
+    gain_info   *pGain = pGb->gBlock;
+    for (i=0 ; i<=numBands; i++)
+    {
+        numData = get_bits(gb,3);
+        pGain[i].num_gain_data = numData;
+        pLevel = pGain[i].levcode;
+        pLoc = pGain[i].loccode;
+        for (cf = 0; cf < numData; cf++){
+            pLevel[cf]= get_bits(gb,4);
+            pLoc  [cf]= get_bits(gb,5);
+            if(cf && pLoc[cf] <= pLoc[cf-1])
+                return -1;
+        }
+    }
+    /* Clear the unused blocks. */
+    for (; i<4 ; i++)
+        pGain[i].num_gain_data = 0;
+    return 0;
+}
+/**
+ * Apply gain parameters and perform the MDCT overlapping part
+ *
+ * @param pIn           input float buffer
+ * @param pPrev         previous float buffer to perform overlap against
+ * @param pOut          output float buffer
+ * @param pGain1        current band gain info
+ * @param pGain2        next band gain info
+ */
+static void gainCompensateAndOverlap (float *pIn, float *pPrev, float *pOut, gain_info *pGain1, gain_info *pGain2)
+{
+    /* gain compensation function */
+    float  gain1, gain2, gain_inc;
+    int   cnt, numdata, nsample, startLoc, endLoc;
+    if (pGain2->num_gain_data == 0)
+        gain1 = 1.0;
+    else
+        gain1 = gain_tab1[pGain2->levcode[0]];
+    if (pGain1->num_gain_data == 0) {
+        for (cnt = 0; cnt < 256; cnt++)
+            pOut[cnt] = pIn[cnt] * gain1 + pPrev[cnt];
+    } else {
+        numdata = pGain1->num_gain_data;
+        pGain1->loccode[numdata] = 32;
+        pGain1->levcode[numdata] = 4;
+        nsample = 0; // current sample = 0
+        for (cnt = 0; cnt < numdata; cnt++) {
+            startLoc = pGain1->loccode[cnt] * 8;
+            endLoc = startLoc + 8;
+            gain2 = gain_tab1[pGain1->levcode[cnt]];
+            gain_inc = gain_tab2[(pGain1->levcode[cnt+1] - pGain1->levcode[cnt])+15];
+            /* interpolate */
+            for (; nsample < startLoc; nsample++)
+                pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2;
+            /* interpolation is done over eight samples */
+            for (; nsample < endLoc; nsample++) {
+                pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2;
+                gain2 *= gain_inc;
+            }
+        }
+        for (; nsample < 256; nsample++)
+            pOut[nsample] = (pIn[nsample] * gain1) + pPrev[nsample];
+    }
+    /* Delay for the overlapping part. */
+    memcpy(pPrev, &pIn[256], 256*sizeof(float));
+}
+/**
+ * Combine the tonal band spectrum and regular band spectrum
+ * Return position of the last tonal coefficient
+ *
+ * @param pSpectrum     output spectrum buffer
+ * @param numComponents amount of tonal components
+ * @param pComponent    tonal components for this band
+ */
+static int addTonalComponents (float *pSpectrum, int numComponents, tonal_component *pComponent)
+{
+    int   cnt, i, lastPos = -1;
+    float   *pIn, *pOut;
+    for (cnt = 0; cnt < numComponents; cnt++){
+        lastPos = FFMAX(pComponent[cnt].pos + pComponent[cnt].numCoefs, lastPos);
+        pIn = pComponent[cnt].coef;
+        pOut = &(pSpectrum[pComponent[cnt].pos]);
+        for (i=0 ; i<pComponent[cnt].numCoefs ; i++)
+            pOut[i] += pIn[i];
+    }
+    return lastPos;
+}
+#define INTERPOLATE(old,new,nsample) ((old) + (nsample)*0.125*((new)-(old)))
+static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrCode)
+{
+    int    i, band, nsample, s1, s2;
+    float    c1, c2;
+    float    mc1_l, mc1_r, mc2_l, mc2_r;
+    for (i=0,band = 0; band < 4*256; band+=256,i++) {
+        s1 = pPrevCode[i];
+        s2 = pCurrCode[i];
+        nsample = 0;
+        if (s1 != s2) {
+            /* Selector value changed, interpolation needed. */
+            mc1_l = matrixCoeffs[s1*2];
+            mc1_r = matrixCoeffs[s1*2+1];
+            mc2_l = matrixCoeffs[s2*2];
+            mc2_r = matrixCoeffs[s2*2+1];
+            /* Interpolation is done over the first eight samples. */
+            for(; nsample < 8; nsample++) {
+                c1 = su1[band+nsample];
+                c2 = su2[band+nsample];
+                c2 = c1 * INTERPOLATE(mc1_l,mc2_l,nsample) + c2 * INTERPOLATE(mc1_r,mc2_r,nsample);
+                su1[band+nsample] = c2;
+                su2[band+nsample] = c1 * 2.0 - c2;
+            }
+        }
+        /* Apply the matrix without interpolation. */
+        switch (s2) {
+            case 0:     /* M/S decoding */
+                for (; nsample < 256; nsample++) {
+                    c1 = su1[band+nsample];
+                    c2 = su2[band+nsample];
+                    su1[band+nsample] = c2 * 2.0;
+                    su2[band+nsample] = (c1 - c2) * 2.0;
+                }
+                break;
+            case 1:
+                for (; nsample < 256; nsample++) {
+                    c1 = su1[band+nsample];
+                    c2 = su2[band+nsample];
+                    su1[band+nsample] = (c1 + c2) * 2.0;
+                    su2[band+nsample] = c2 * -2.0;
+                }
+                break;
+            case 2:
+            case 3:
+                for (; nsample < 256; nsample++) {
+                    c1 = su1[band+nsample];
+                    c2 = su2[band+nsample];
+                    su1[band+nsample] = c1 + c2;
+                    su2[band+nsample] = c1 - c2;
+                }
+                break;
+            default:
+                assert(0);
+        }
+    }
+}
+static void getChannelWeights (int indx, int flag, float ch[2]){
+    if (indx == 7) {
+        ch[0] = 1.0;
+        ch[1] = 1.0;
+    } else {
+        ch[0] = (float)(indx & 7) / 7.0;
+        ch[1] = sqrt(2 - ch[0]*ch[0]);
+        if(flag)
+            FFSWAP(float, ch[0], ch[1]);
+    }
+}
+static void channelWeighting (float *su1, float *su2, int *p3)
+{
+    int   band, nsample;
+    /* w[x][y] y=0 is left y=1 is right */
+    float w[2][2];
+    if (p3[1] != 7 || p3[3] != 7){
+        getChannelWeights(p3[1], p3[0], w[0]);
+        getChannelWeights(p3[3], p3[2], w[1]);
+        for(band = 1; band < 4; band++) {
+            /* scale the channels by the weights */
+            for(nsample = 0; nsample < 8; nsample++) {
+                su1[band*256+nsample] *= INTERPOLATE(w[0][0], w[0][1], nsample);
+                su2[band*256+nsample] *= INTERPOLATE(w[1][0], w[1][1], nsample);
+            }
+            for(; nsample < 256; nsample++) {
+                su1[band*256+nsample] *= w[1][0];
+                su2[band*256+nsample] *= w[1][1];
+            }
+        }
+    }
+}
+/**
+ * Decode a Sound Unit
+ *
+ * @param gb            the GetBit context
+ * @param pSnd          the channel unit to be used
+ * @param pOut          the decoded samples before IQMF in float representation
+ * @param channelNum    channel number
+ * @param codingMode    the coding mode (JOINT_STEREO or regular stereo/mono)
+ */
+static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_unit *pSnd, float *pOut, int channelNum, int codingMode)
+{
+    int   band, result=0, numSubbands, lastTonal, numBands;
+    if (codingMode == JOINT_STEREO && channelNum == 1) {
+        if (get_bits(gb,2) != 3) {
+            av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n");
+            return -1;
+        }
+    } else {
+        if (get_bits(gb,6) != 0x28) {
+            av_log(NULL,AV_LOG_ERROR,"Sound Unit id != 0x28.\n");
+            return -1;
+        }
+    }
+    /* number of coded QMF bands */
+    pSnd->bandsCoded = get_bits(gb,2);
+    result = decodeGainControl (gb, &(pSnd->gainBlock[pSnd->gcBlkSwitch]), pSnd->bandsCoded);
+    if (result) return result;
+    pSnd->numComponents = decodeTonalComponents (gb, pSnd->components, pSnd->bandsCoded);
+    if (pSnd->numComponents == -1) return -1;
+    numSubbands = decodeSpectrum (gb, pSnd->spectrum);
+    /* Merge the decoded spectrum and tonal components. */
+    lastTonal = addTonalComponents (pSnd->spectrum, pSnd->numComponents, pSnd->components);
+    /* calculate number of used MLT/QMF bands according to the amount of coded spectral lines */
+    numBands = (subbandTab[numSubbands] - 1) >> 8;
+    if (lastTonal >= 0)
+        numBands = FFMAX((lastTonal + 256) >> 8, numBands);
+    /* Reconstruct time domain samples. */
+    for (band=0; band<4; band++) {
+        /* Perform the IMDCT step without overlapping. */
+        if (band <= numBands) {
+            IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf, band&1);
+        } else
+            memset(pSnd->IMDCT_buf, 0, 512 * sizeof(float));
+        /* gain compensation and overlapping */
+        gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]),
+                                    &((pSnd->gainBlock[1 - (pSnd->gcBlkSwitch)]).gBlock[band]),
+                                    &((pSnd->gainBlock[pSnd->gcBlkSwitch]).gBlock[band]));
+    }
+    /* Swap the gain control buffers for the next frame. */
+    pSnd->gcBlkSwitch ^= 1;
+    return 0;
+}
+/**
+ * Frame handling
+ *
+ * @param q             Atrac3 private context
+ * @param databuf       the input data
+ */
+static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf)
+{
+    int   result, i;
+    float   *p1, *p2, *p3, *p4;
+    uint8_t *ptr1;
+    if (q->codingMode == JOINT_STEREO) {
+        /* channel coupling mode */
+        /* decode Sound Unit 1 */
+        init_get_bits(&q->gb,databuf,q->bits_per_frame);
+        result = decodeChannelSoundUnit(q,&q->gb, q->pUnits, q->outSamples, 0, JOINT_STEREO);
+        if (result != 0)
+            return (result);
+        /* Framedata of the su2 in the joint-stereo mode is encoded in
+         * reverse byte order so we need to swap it first. */
+        if (databuf == q->decoded_bytes_buffer) {
+            uint8_t *ptr2 = q->decoded_bytes_buffer+q->bytes_per_frame-1;
+            ptr1 = q->decoded_bytes_buffer;
+            for (i = 0; i < (q->bytes_per_frame/2); i++, ptr1++, ptr2--) {
+                FFSWAP(uint8_t,*ptr1,*ptr2);
+            }
+        } else {
+            const uint8_t *ptr2 = databuf+q->bytes_per_frame-1;
+            for (i = 0; i < q->bytes_per_frame; i++)
+                q->decoded_bytes_buffer[i] = *ptr2--;
+        }
+        /* Skip the sync codes (0xF8). */
+        ptr1 = q->decoded_bytes_buffer;
+        for (i = 4; *ptr1 == 0xF8; i++, ptr1++) {
+            if (i >= q->bytes_per_frame)
+                return -1;
+        }
+        /* set the bitstream reader at the start of the second Sound Unit*/
+        init_get_bits(&q->gb,ptr1,q->bits_per_frame);
+        /* Fill the Weighting coeffs delay buffer */
+        memmove(q->weighting_delay,&(q->weighting_delay[2]),4*sizeof(int));
+        q->weighting_delay[4] = get_bits1(&q->gb);
+        q->weighting_delay[5] = get_bits(&q->gb,3);
+        for (i = 0; i < 4; i++) {
+            q->matrix_coeff_index_prev[i] = q->matrix_coeff_index_now[i];
+            q->matrix_coeff_index_now[i] = q->matrix_coeff_index_next[i];
+            q->matrix_coeff_index_next[i] = get_bits(&q->gb,2);
+        }
+        /* Decode Sound Unit 2. */
+        result = decodeChannelSoundUnit(q,&q->gb, &q->pUnits[1], &q->outSamples[1024], 1, JOINT_STEREO);
+        if (result != 0)
+            return (result);
+        /* Reconstruct the channel coefficients. */
+        reverseMatrixing(q->outSamples, &q->outSamples[1024], q->matrix_coeff_index_prev, q->matrix_coeff_index_now);
+        channelWeighting(q->outSamples, &q->outSamples[1024], q->weighting_delay);
+    } else {
+        /* normal stereo mode or mono */
+        /* Decode the channel sound units. */
+        for (i=0 ; i<q->channels ; i++) {
+            /* Set the bitstream reader at the start of a channel sound unit. */
+            init_get_bits(&q->gb, databuf+((i*q->bytes_per_frame)/q->channels), (q->bits_per_frame)/q->channels);
+            result = decodeChannelSoundUnit(q,&q->gb, &q->pUnits[i], &q->outSamples[i*1024], i, q->codingMode);
+            if (result != 0)
+                return (result);
+        }
+    }
+    /* Apply the iQMF synthesis filter. */
+    p1= q->outSamples;
+    for (i=0 ; i<q->channels ; i++) {
+        p2= p1+256;
+        p3= p2+256;
+        p4= p3+256;
+        iqmf (p1, p2, 256, p1, q->pUnits[i].delayBuf1, q->tempBuf);
+        iqmf (p4, p3, 256, p3, q->pUnits[i].delayBuf2, q->tempBuf);
+        iqmf (p1, p3, 512, p1, q->pUnits[i].delayBuf3, q->tempBuf);
+        p1 +=1024;
+    }
+    return 0;
+}
+/**
+ * Atrac frame decoding
+ *
+ * @param rmctx     pointer to the AVCodecContext
+ */
+static int atrac3_decode_frame(RMContext *rmctx, ATRAC3Context *q,
+            void *data, int *data_size,
+            const uint8_t *buf, int buf_size) {
+    //ATRAC3Context *q = rmctx->priv_data;
+    int result = 0, i;
+    const uint8_t* databuf;
+    int16_t* samples = data;
+    if (buf_size < rmctx->block_align)
+        return buf_size;
+    /* Check if we need to descramble and what buffer to pass on. */
+    if (q->scrambled_stream) {
+        decode_bytes(buf, q->decoded_bytes_buffer, rmctx->block_align);
+        databuf = q->decoded_bytes_buffer;
+    } else {
+        databuf = buf;
+    }
+    result = decodeFrame(q, databuf);
+    if (result != 0) {
+        av_log(NULL,AV_LOG_ERROR,"Frame decoding error!\n");
+        return -1;
+    }
+    if (q->channels == 1) {
+        /* mono */
+        for (i = 0; i<1024; i++)
+            samples[i] = av_clip_int16(round(q->outSamples[i]));
+        *data_size = 1024 * sizeof(int16_t);
+    } else {
+        /* stereo */
+        for (i = 0; i < 1024; i++) {
+            samples[i*2] = av_clip_int16(round(q->outSamples[i]));
+            samples[i*2+1] = av_clip_int16(round(q->outSamples[1024+i]));
+        }
+        *data_size = 2048 * sizeof(int16_t);
+    }
+    return rmctx->block_align;
+}
+/**
+ * Atrac3 initialization
+ *
+ * @param rmctx     pointer to the RMContext
+ */
+static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx)
+{
+    int i;
+    const uint8_t *edata_ptr = rmctx->codec_extradata;
+    //ATRAC3Context *q = rmctx->priv_data;
+    static VLC_TYPE atrac3_vlc_table[4096][2];
+    static int vlcs_initialized = 0;
+    /* Take data from the AVCodecContext (RM container). */
+    q->sample_rate = rmctx->sample_rate;
+    q->channels = rmctx->nb_channels;
+    q->bit_rate = rmctx->bit_rate;
+    q->bits_per_frame = rmctx->block_align * 8;
+    q->bytes_per_frame = rmctx->block_align;
+    /* Take care of the codec-specific extradata. */
+    if (rmctx->extradata_size == 14) {
+        /* Parse the extradata, WAV format */
+        av_log(rmctx,AV_LOG_DEBUG,"[0-1] %d\n",bytestream_get_le16(&edata_ptr));  //Unknown value always 1
+        q->samples_per_channel = bytestream_get_le32(&edata_ptr);
+        q->codingMode = bytestream_get_le16(&edata_ptr);
+        av_log(rmctx,AV_LOG_DEBUG,"[8-9] %d\n",bytestream_get_le16(&edata_ptr));  //Dupe of coding mode
+        q->frame_factor = bytestream_get_le16(&edata_ptr);  //Unknown always 1
+        av_log(rmctx,AV_LOG_DEBUG,"[12-13] %d\n",bytestream_get_le16(&edata_ptr));  //Unknown always 0
+        /* setup */
+        q->samples_per_frame = 1024 * q->channels;
+        q->atrac3version = 4;
+        q->delay = 0x88E;
+        if (q->codingMode)
+            q->codingMode = JOINT_STEREO;
+        else
+            q->codingMode = STEREO;
+        q->scrambled_stream = 0;
+        if ((q->bytes_per_frame == 96*q->channels*q->frame_factor) || (q->bytes_per_frame == 152*q->channels*q->frame_factor) || (q->bytes_per_frame == 192*q->channels*q->frame_factor)) {
+        } else {
+            av_log(rmctx,AV_LOG_ERROR,"Unknown frame/channel/frame_factor configuration %d/%d/%d\n", q->bytes_per_frame, q->channels, q->frame_factor);
+            return -1;
+        }
+    } else if (rmctx->extradata_size == 10) {
+        /* Parse the extradata, RM format. */
+        q->atrac3version = bytestream_get_be32(&edata_ptr);
+        q->samples_per_frame = bytestream_get_be16(&edata_ptr);
+        q->delay = bytestream_get_be16(&edata_ptr);
+        q->codingMode = bytestream_get_be16(&edata_ptr);
+        q->samples_per_channel = q->samples_per_frame / q->channels;
+        q->scrambled_stream = 1;
+    } else {
+        av_log(NULL,AV_LOG_ERROR,"Unknown extradata size %d.\n",rmctx->extradata_size);
+    }
+    /* Check the extradata. */
+    if (q->atrac3version != 4) {
+        av_log(rmctx,AV_LOG_ERROR,"Version %d != 4.\n",q->atrac3version);
+        return -1;
+    }
+    if (q->samples_per_frame != 1024 && q->samples_per_frame != 2048) {
+        av_log(rmctx,AV_LOG_ERROR,"Unknown amount of samples per frame %d.\n",q->samples_per_frame);
+        return -1;
+    }
+    if (q->delay != 0x88E) {
+        av_log(rmctx,AV_LOG_ERROR,"Unknown amount of delay %x != 0x88E.\n",q->delay);
+        return -1;
+    }
+    if (q->codingMode == STEREO) {
+        av_log(rmctx,AV_LOG_DEBUG,"Normal stereo detected.\n");
+    } else if (q->codingMode == JOINT_STEREO) {
+        av_log(rmctx,AV_LOG_DEBUG,"Joint stereo detected.\n");
+    } else {
+        av_log(rmctx,AV_LOG_ERROR,"Unknown channel coding mode %x!\n",q->codingMode);
+        return -1;
+    }
+    if (rmctx->nb_channels <= 0 || rmctx->nb_channels > 2 /*|| ((rmctx->channels * 1024) != q->samples_per_frame)*/) {
+        av_log(rmctx,AV_LOG_ERROR,"Channel configuration error!\n");
+        return -1;
+    }
+    if(rmctx->block_align >= UINT16_MAX/2)
+        return -1;
+    /* Pad the data buffer with FF_INPUT_BUFFER_PADDING_SIZE,
+     * this is for the bitstream reader. */
+    if ((q->decoded_bytes_buffer = av_mallocz((rmctx->block_align+(4-rmctx->block_align%4) + FF_INPUT_BUFFER_PADDING_SIZE)))  == NULL)
+        return AVERROR(ENOMEM);
+    /* Initialize the VLC tables. */
+    if (!vlcs_initialized) {
+        for (i=0 ; i<7 ; i++) {
+            spectral_coeff_tab[i].table = &atrac3_vlc_table[atrac3_vlc_offs[i]];
+            spectral_coeff_tab[i].table_allocated = atrac3_vlc_offs[i + 1] - atrac3_vlc_offs[i];
+            init_vlc (&spectral_coeff_tab[i], 9, huff_tab_sizes[i],
+                huff_bits[i], 1, 1,
+                huff_codes[i], 1, 1, INIT_VLC_USE_NEW_STATIC);
+        }
+        vlcs_initialized = 1;
+    }
+    init_atrac3_transforms(q);
+    /* Generate the scale factors. */
+    for (i=0 ; i<64 ; i++)
+        SFTable[i] = pow(2.0, (i - 15) / 3.0);
+    /* Generate gain tables. */
+    for (i=0 ; i<16 ; i++)
+        gain_tab1[i] = powf (2.0, (4 - i));
+    for (i=-15 ; i<16 ; i++)
+        gain_tab2[i+15] = powf (2.0, i * -0.125);
+    /* init the joint-stereo decoding data */
+    q->weighting_delay[0] = 0;
+    q->weighting_delay[1] = 7;
+    q->weighting_delay[2] = 0;
+    q->weighting_delay[3] = 7;
+    q->weighting_delay[4] = 0;
+    q->weighting_delay[5] = 7;
+    for (i=0; i<4; i++) {
+        q->matrix_coeff_index_prev[i] = 3;
+        q->matrix_coeff_index_now[i] = 3;
+        q->matrix_coeff_index_next[i] = 3;
+    }
+    dsputil_init(&dsp);
+    q->pUnits = av_mallocz(sizeof(channel_unit)*q->channels);
+    if (!q->pUnits) {
+        av_free(q->decoded_bytes_buffer);
+        return AVERROR(ENOMEM);
+    }
+    
+    return 0;
+}
+/***************************************************************
+ * Following is a test program to convert from atrac/rm to wav *
+ ***************************************************************/
+static unsigned char wav_header[44]={
+    'R','I','F','F',//  0 - ChunkID
+    0,0,0,0,        //  4 - ChunkSize (filesize-8)
+    'W','A','V','E',//  8 - Format
+    'f','m','t',' ',// 12 - SubChunkID
+    16,0,0,0,       // 16 - SubChunk1ID  // 16 for PCM
+    1,0,            // 20 - AudioFormat (1=Uncompressed)
+    2,0,            // 22 - NumChannels
+    0,0,0,0,        // 24 - SampleRate in Hz
+    0,0,0,0,        // 28 - Byte Rate (SampleRate*NumChannels*(BitsPerSample/8)
+    4,0,            // 32 - BlockAlign (== NumChannels * BitsPerSample/8)
+    16,0,           // 34 - BitsPerSample
+    'd','a','t','a',// 36 - Subchunk2ID
+    0,0,0,0         // 40 - Subchunk2Size
+};
+int open_wav(char* filename) {
+    int fd,res;
+    fd=open(filename,O_CREAT|O_WRONLY|O_TRUNC,S_IRUSR|S_IWUSR);
+    if (fd >= 0) {
+        res = write(fd,wav_header,sizeof(wav_header));
+    }
+    return(fd);
+}
+void close_wav(int fd, RMContext *rmctx, ATRAC3Context *q) {
+    int x,res;
+    int filesize;
+    int bytes_per_sample = 2;
+    int samples_per_frame = q->samples_per_frame;
+    int nb_channels = rmctx->nb_channels;
+    int sample_rate = rmctx->sample_rate;
+    int nb_frames = rmctx->audio_framesize/rmctx->block_align * rmctx->nb_packets - 2; // first 2 frames have no valid audio; skipped in output
+    filesize= samples_per_frame*bytes_per_sample*nb_frames +44;
+    printf("Filesize = %d\n",filesize);
+    // ChunkSize
+    x=filesize-8;
+    wav_header[4]=(x&0xff);
+    wav_header[5]=(x&0xff00)>>8;
+    wav_header[6]=(x&0xff0000)>>16;
+    wav_header[7]=(x&0xff000000)>>24;
+    // Number of channels
+    wav_header[22]=nb_channels;
+    // Samplerate
+    wav_header[24]=sample_rate&0xff;
+    wav_header[25]=(sample_rate&0xff00)>>8;
+    wav_header[26]=(sample_rate&0xff0000)>>16;
+    wav_header[27]=(sample_rate&0xff000000)>>24;
+    // ByteRate
+    x=sample_rate*bytes_per_sample*nb_channels;
+    wav_header[28]=(x&0xff);
+    wav_header[29]=(x&0xff00)>>8;
+    wav_header[30]=(x&0xff0000)>>16;
+    wav_header[31]=(x&0xff000000)>>24;
+    // BlockAlign
+    wav_header[32]=rmctx->block_align;//2*rmctx->nb_channels;
+    // Bits per sample
+    wav_header[34]=16;
+    
+    // Subchunk2Size
+    x=filesize-44;
+    wav_header[40]=(x&0xff);
+    wav_header[41]=(x&0xff00)>>8;
+    wav_header[42]=(x&0xff0000)>>16;
+    wav_header[43]=(x&0xff000000)>>24;
+    lseek(fd,0,SEEK_SET);
+    res = write(fd,wav_header,sizeof(wav_header));
+    close(fd);
+}
+int main(int argc, char *argv[])
+{
+    int fd, fd_dec;
+    int res, i, datasize = 0;
+#ifdef DUMP_RAW_FRAMES 
+    char filename[15];
+    int fd_out;
+#endif
+    int16_t outbuf[2048];
+    uint16_t fs,sps,h;
+    uint32_t packet_count;
+    ATRAC3Context q;
+    RMContext rmctx;
+    RMPacket pkt;
+    memset(&q,0,sizeof(ATRAC3Context));
+    memset(&rmctx,0,sizeof(RMContext));
+    memset(&pkt,0,sizeof(RMPacket));
+    if (argc != 2) {
+        DEBUGF("Incorrect number of arguments\n");
+        return -1;
+    }
+    fd = open(argv[1],O_RDONLY);
+    if (fd < 0) {
+        DEBUGF("Error opening file %s\n", argv[1]);
+        return -1;
+    }
+    
+    /* copy the input rm file to a memory buffer */
+    uint8_t * filebuf = (uint8_t *)calloc((int)filesize(fd),sizeof(uint8_t));
+    res = read(fd,filebuf,filesize(fd)); 
+ 
+    fd_dec = open_wav("output.wav");
+    if (fd_dec < 0) {
+        DEBUGF("Error creating output file\n");
+        return -1;
+    }
+    res = real_parse_header(fd, &rmctx);
+    packet_count = rmctx.nb_packets;
+    rmctx.audio_framesize = rmctx.block_align;
+    rmctx.block_align = rmctx.sub_packet_size;
+    fs = rmctx.audio_framesize;
+    sps= rmctx.block_align;
+    h = rmctx.sub_packet_h;
+    atrac3_decode_init(&q,&rmctx);
+    
+    /* change the buffer pointer to point at the first audio frame */
+    advance_buffer(&filebuf, rmctx.data_offset + DATA_HEADER_SIZE);
+    while(packet_count)
+    {  
+        rm_get_packet(&filebuf, &rmctx, &pkt);
+        for(i = 0; i < rmctx.audio_pkt_cnt*(fs/sps) ; i++)
+        { 
+            /* output raw audio frames that are sent to the decoder into separate files */
+#ifdef DUMP_RAW_FRAMES 
+              snprintf(filename,sizeof(filename),"dump%d.raw",++x);
+              fd_out = open(filename,O_WRONLY|O_CREAT|O_APPEND);           
+              write(fd_out,pkt.frames[i],sps);  
+              close(fd_out);
+#endif
+            if(pkt.length > 0)
+                res = atrac3_decode_frame(&rmctx,&q, outbuf, &datasize, pkt.frames[i] , rmctx.block_align);
+            rmctx.frame_number++;
+            res = write(fd_dec,outbuf,datasize);                  
+        }
+        packet_count -= rmctx.audio_pkt_cnt;
+        rmctx.audio_pkt_cnt = 0;
+    }
+    atrac3_decode_close(&q);
+    close_wav(fd_dec, &rmctx, &q);
+    close(fd);
+  return 0;
+}
diff --git a/apps/codecs/libatrac/atrac3data.h b/apps/codecs/libatrac/atrac3data.h
new file mode 100644
index 0000000000..e71dc35560
--- /dev/null
+++ b/apps/codecs/libatrac/atrac3data.h
@@ -0,0 +1,144 @@
+/*
+ * Atrac 3 compatible decoder data
+ * Copyright (c) 2006-2007 Maxim Poliakovski
+ * Copyright (c) 2006-2007 Benjamin Larsson
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/atrac3data.h
+ * Atrac 3 AKA RealAudio 8 compatible decoder data
+ */
+#ifndef AVCODEC_ATRAC3DATA_H
+#define AVCODEC_ATRAC3DATA_H
+#include <stdint.h>
+/* VLC tables */
+static const uint8_t huffcode1[9] = {
+  0x0,0x4,0x5,0xC,0xD,0x1C,0x1D,0x1E,0x1F,
+};
+static const uint8_t huffbits1[9] = {
+  1,3,3,4,4,5,5,5,5,
+};
+static const uint8_t huffcode2[5] = {
+  0x0,0x4,0x5,0x6,0x7,
+};
+static const uint8_t huffbits2[5] = {
+  1,3,3,3,3,
+};
+static const uint8_t huffcode3[7] = {
+0x0,0x4,0x5,0xC,0xD,0xE,0xF,
+};
+static const uint8_t huffbits3[7] = {
+  1,3,3,4,4,4,4,
+};
+static const uint8_t huffcode4[9] = {
+  0x0,0x4,0x5,0xC,0xD,0x1C,0x1D,0x1E,0x1F,
+};
+static const uint8_t huffbits4[9] = {
+  1,3,3,4,4,5,5,5,5,
+};
+static const uint8_t huffcode5[15] = {
+  0x0,0x2,0x3,0x8,0x9,0xA,0xB,0x1C,0x1D,0x3C,0x3D,0x3E,0x3F,0xC,0xD,
+};
+static const uint8_t huffbits5[15] = {
+  2,3,3,4,4,4,4,5,5,6,6,6,6,4,4
+};
+static const uint8_t huffcode6[31] = {
+  0x0,0x2,0x3,0x4,0x5,0x6,0x7,0x14,0x15,0x16,0x17,0x18,0x19,0x34,0x35,
+  0x36,0x37,0x38,0x39,0x3A,0x3B,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,0x8,0x9,
+};
+static const uint8_t huffbits6[31] = {
+  3,4,4,4,4,4,4,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,4,4
+};
+static const uint8_t huffcode7[63] = {
+  0x0,0x8,0x9,0xA,0xB,0xC,0xD,0xE,0xF,0x10,0x11,0x24,0x25,0x26,0x27,0x28,
+  0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,0x30,0x31,0x32,0x33,0x68,0x69,0x6A,0x6B,0x6C,
+  0x6D,0x6E,0x6F,0x70,0x71,0x72,0x73,0x74,0x75,0xEC,0xED,0xEE,0xEF,0xF0,0xF1,0xF2,
+  0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,0x2,0x3,
+};
+static const uint8_t huffbits7[63] = {
+  3,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,4,4
+};
+static const uint8_t huff_tab_sizes[7] = {
+  9, 5, 7, 9, 15, 31, 63,
+};
+static const uint8_t* const huff_codes[7] = {
+  huffcode1,huffcode2,huffcode3,huffcode4,huffcode5,huffcode6,huffcode7,
+};
+static const uint8_t* const huff_bits[7] = {
+  huffbits1,huffbits2,huffbits3,huffbits4,huffbits5,huffbits6,huffbits7,
+};
+static const uint16_t atrac3_vlc_offs[] = {
+    0,512,1024,1536,2048,2560,3072,3584,4096
+};
+/* selector tables */
+static const uint8_t CLCLengthTab[8] = {0, 4, 3, 3, 4, 4, 5, 6};
+static const int8_t seTab_0[4] = {0, 1, -2, -1};
+static const int8_t decTable1[18] = {0,0, 0,1, 0,-1, 1,0, -1,0, 1,1, 1,-1, -1,1, -1,-1};
+/* tables for the scalefactor decoding */
+static const float iMaxQuant[8] = {
+  0.0, 1.0/1.5, 1.0/2.5, 1.0/3.5, 1.0/4.5, 1.0/7.5, 1.0/15.5, 1.0/31.5
+};
+static const uint16_t subbandTab[33] = {
+  0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224,
+  256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, 896, 1024
+};
+/* transform data */
+static const float qmf_48tap_half[24] = {
+   -0.00001461907, -0.00009205479, -0.000056157569, 0.00030117269,
+    0.0002422519,-0.00085293897, -0.0005205574, 0.0020340169,
+    0.00078333891, -0.0042153862, -0.00075614988, 0.0078402944,
+   -0.000061169922, -0.01344162, 0.0024626821, 0.021736089,
+   -0.007801671, -0.034090221, 0.01880949, 0.054326009,
+   -0.043596379, -0.099384367, 0.13207909, 0.46424159
+};
+/* joint stereo related tables */
+static const float matrixCoeffs[8] = {0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0};
+#endif /* AVCODEC_ATRAC3DATA_H */
diff --git a/apps/codecs/libatrac/avcodec.h b/apps/codecs/libatrac/avcodec.h
new file mode 100644
index 0000000000..ed1bdecfce
--- /dev/null
+++ b/apps/codecs/libatrac/avcodec.h
@@ -0,0 +1,3433 @@
+/*
+ * copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_AVCODEC_H
+#define AVCODEC_AVCODEC_H
+/**
+ * @file libavcodec/avcodec.h
+ * external API header
+ */
+#include <errno.h>
+#include "libavutil/avutil.h"
+#define LIBAVCODEC_VERSION_MAJOR 52
+#define LIBAVCODEC_VERSION_MINOR 22
+#define LIBAVCODEC_VERSION_MICRO  0
+#define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
+                                               LIBAVCODEC_VERSION_MINOR, \
+                                               LIBAVCODEC_VERSION_MICRO)
+#define LIBAVCODEC_VERSION      AV_VERSION(LIBAVCODEC_VERSION_MAJOR,    \
+                                           LIBAVCODEC_VERSION_MINOR,    \
+                                           LIBAVCODEC_VERSION_MICRO)
+#define LIBAVCODEC_BUILD        LIBAVCODEC_VERSION_INT
+#define LIBAVCODEC_IDENT        "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
+#define AV_NOPTS_VALUE          INT64_C(0x8000000000000000)
+#define AV_TIME_BASE            1000000
+#define AV_TIME_BASE_Q          (AVRational){1, AV_TIME_BASE}
+/**
+ * Identifies the syntax and semantics of the bitstream.
+ * The principle is roughly:
+ * Two decoders with the same ID can decode the same streams.
+ * Two encoders with the same ID can encode compatible streams.
+ * There may be slight deviations from the principle due to implementation
+ * details.
+ *
+ * If you add a codec ID to this list, add it so that
+ * 1. no value of a existing codec ID changes (that would break ABI),
+ * 2. it is as close as possible to similar codecs.
+ */
+enum CodecID {
+    CODEC_ID_NONE,
+    /* video codecs */
+    CODEC_ID_MPEG1VIDEO,
+    CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding
+    CODEC_ID_MPEG2VIDEO_XVMC,
+    CODEC_ID_H261,
+    CODEC_ID_H263,
+    CODEC_ID_RV10,
+    CODEC_ID_RV20,
+    CODEC_ID_MJPEG,
+    CODEC_ID_MJPEGB,
+    CODEC_ID_LJPEG,
+    CODEC_ID_SP5X,
+    CODEC_ID_JPEGLS,
+    CODEC_ID_MPEG4,
+    CODEC_ID_RAWVIDEO,
+    CODEC_ID_MSMPEG4V1,
+    CODEC_ID_MSMPEG4V2,
+    CODEC_ID_MSMPEG4V3,
+    CODEC_ID_WMV1,
+    CODEC_ID_WMV2,
+    CODEC_ID_H263P,
+    CODEC_ID_H263I,
+    CODEC_ID_FLV1,
+    CODEC_ID_SVQ1,
+    CODEC_ID_SVQ3,
+    CODEC_ID_DVVIDEO,
+    CODEC_ID_HUFFYUV,
+    CODEC_ID_CYUV,
+    CODEC_ID_H264,
+    CODEC_ID_INDEO3,
+    CODEC_ID_VP3,
+    CODEC_ID_THEORA,
+    CODEC_ID_ASV1,
+    CODEC_ID_ASV2,
+    CODEC_ID_FFV1,
+    CODEC_ID_4XM,
+    CODEC_ID_VCR1,
+    CODEC_ID_CLJR,
+    CODEC_ID_MDEC,
+    CODEC_ID_ROQ,
+    CODEC_ID_INTERPLAY_VIDEO,
+    CODEC_ID_XAN_WC3,
+    CODEC_ID_XAN_WC4,
+    CODEC_ID_RPZA,
+    CODEC_ID_CINEPAK,
+    CODEC_ID_WS_VQA,
+    CODEC_ID_MSRLE,
+    CODEC_ID_MSVIDEO1,
+    CODEC_ID_IDCIN,
+    CODEC_ID_8BPS,
+    CODEC_ID_SMC,
+    CODEC_ID_FLIC,
+    CODEC_ID_TRUEMOTION1,
+    CODEC_ID_VMDVIDEO,
+    CODEC_ID_MSZH,
+    CODEC_ID_ZLIB,
+    CODEC_ID_QTRLE,
+    CODEC_ID_SNOW,
+    CODEC_ID_TSCC,
+    CODEC_ID_ULTI,
+    CODEC_ID_QDRAW,
+    CODEC_ID_VIXL,
+    CODEC_ID_QPEG,
+    CODEC_ID_XVID,
+    CODEC_ID_PNG,
+    CODEC_ID_PPM,
+    CODEC_ID_PBM,
+    CODEC_ID_PGM,
+    CODEC_ID_PGMYUV,
+    CODEC_ID_PAM,
+    CODEC_ID_FFVHUFF,
+    CODEC_ID_RV30,
+    CODEC_ID_RV40,
+    CODEC_ID_VC1,
+    CODEC_ID_WMV3,
+    CODEC_ID_LOCO,
+    CODEC_ID_WNV1,
+    CODEC_ID_AASC,
+    CODEC_ID_INDEO2,
+    CODEC_ID_FRAPS,
+    CODEC_ID_TRUEMOTION2,
+    CODEC_ID_BMP,
+    CODEC_ID_CSCD,
+    CODEC_ID_MMVIDEO,
+    CODEC_ID_ZMBV,
+    CODEC_ID_AVS,
+    CODEC_ID_SMACKVIDEO,
+    CODEC_ID_NUV,
+    CODEC_ID_KMVC,
+    CODEC_ID_FLASHSV,
+    CODEC_ID_CAVS,
+    CODEC_ID_JPEG2000,
+    CODEC_ID_VMNC,
+    CODEC_ID_VP5,
+    CODEC_ID_VP6,
+    CODEC_ID_VP6F,
+    CODEC_ID_TARGA,
+    CODEC_ID_DSICINVIDEO,
+    CODEC_ID_TIERTEXSEQVIDEO,
+    CODEC_ID_TIFF,
+    CODEC_ID_GIF,
+    CODEC_ID_FFH264,
+    CODEC_ID_DXA,
+    CODEC_ID_DNXHD,
+    CODEC_ID_THP,
+    CODEC_ID_SGI,
+    CODEC_ID_C93,
+    CODEC_ID_BETHSOFTVID,
+    CODEC_ID_PTX,
+    CODEC_ID_TXD,
+    CODEC_ID_VP6A,
+    CODEC_ID_AMV,
+    CODEC_ID_VB,
+    CODEC_ID_PCX,
+    CODEC_ID_SUNRAST,
+    CODEC_ID_INDEO4,
+    CODEC_ID_INDEO5,
+    CODEC_ID_MIMIC,
+    CODEC_ID_RL2,
+    CODEC_ID_8SVX_EXP,
+    CODEC_ID_8SVX_FIB,
+    CODEC_ID_ESCAPE124,
+    CODEC_ID_DIRAC,
+    CODEC_ID_BFI,
+    CODEC_ID_CMV,
+    CODEC_ID_MOTIONPIXELS,
+    CODEC_ID_TGV,
+    CODEC_ID_TGQ,
+    CODEC_ID_TQI,
+    /* various PCM "codecs" */
+    CODEC_ID_PCM_S16LE= 0x10000,
+    CODEC_ID_PCM_S16BE,
+    CODEC_ID_PCM_U16LE,
+    CODEC_ID_PCM_U16BE,
+    CODEC_ID_PCM_S8,
+    CODEC_ID_PCM_U8,
+    CODEC_ID_PCM_MULAW,
+    CODEC_ID_PCM_ALAW,
+    CODEC_ID_PCM_S32LE,
+    CODEC_ID_PCM_S32BE,
+    CODEC_ID_PCM_U32LE,
+    CODEC_ID_PCM_U32BE,
+    CODEC_ID_PCM_S24LE,
+    CODEC_ID_PCM_S24BE,
+    CODEC_ID_PCM_U24LE,
+    CODEC_ID_PCM_U24BE,
+    CODEC_ID_PCM_S24DAUD,
+    CODEC_ID_PCM_ZORK,
+    CODEC_ID_PCM_S16LE_PLANAR,
+    CODEC_ID_PCM_DVD,
+    CODEC_ID_PCM_F32BE,
+    CODEC_ID_PCM_F32LE,
+    CODEC_ID_PCM_F64BE,
+    CODEC_ID_PCM_F64LE,
+    /* various ADPCM codecs */
+    CODEC_ID_ADPCM_IMA_QT= 0x11000,
+    CODEC_ID_ADPCM_IMA_WAV,
+    CODEC_ID_ADPCM_IMA_DK3,
+    CODEC_ID_ADPCM_IMA_DK4,
+    CODEC_ID_ADPCM_IMA_WS,
+    CODEC_ID_ADPCM_IMA_SMJPEG,
+    CODEC_ID_ADPCM_MS,
+    CODEC_ID_ADPCM_4XM,
+    CODEC_ID_ADPCM_XA,
+    CODEC_ID_ADPCM_ADX,
+    CODEC_ID_ADPCM_EA,
+    CODEC_ID_ADPCM_G726,
+    CODEC_ID_ADPCM_CT,
+    CODEC_ID_ADPCM_SWF,
+    CODEC_ID_ADPCM_YAMAHA,
+    CODEC_ID_ADPCM_SBPRO_4,
+    CODEC_ID_ADPCM_SBPRO_3,
+    CODEC_ID_ADPCM_SBPRO_2,
+    CODEC_ID_ADPCM_THP,
+    CODEC_ID_ADPCM_IMA_AMV,
+    CODEC_ID_ADPCM_EA_R1,
+    CODEC_ID_ADPCM_EA_R3,
+    CODEC_ID_ADPCM_EA_R2,
+    CODEC_ID_ADPCM_IMA_EA_SEAD,
+    CODEC_ID_ADPCM_IMA_EA_EACS,
+    CODEC_ID_ADPCM_EA_XAS,
+    CODEC_ID_ADPCM_EA_MAXIS_XA,
+    CODEC_ID_ADPCM_IMA_ISS,
+    /* AMR */
+    CODEC_ID_AMR_NB= 0x12000,
+    CODEC_ID_AMR_WB,
+    /* RealAudio codecs*/
+    CODEC_ID_RA_144= 0x13000,
+    CODEC_ID_RA_288,
+    /* various DPCM codecs */
+    CODEC_ID_ROQ_DPCM= 0x14000,
+    CODEC_ID_INTERPLAY_DPCM,
+    CODEC_ID_XAN_DPCM,
+    CODEC_ID_SOL_DPCM,
+    /* audio codecs */
+    CODEC_ID_MP2= 0x15000,
+    CODEC_ID_MP3, ///< preferred ID for decoding MPEG audio layer 1, 2 or 3
+    CODEC_ID_AAC,
+    CODEC_ID_AC3,
+    CODEC_ID_DTS,
+    CODEC_ID_VORBIS,
+    CODEC_ID_DVAUDIO,
+    CODEC_ID_WMAV1,
+    CODEC_ID_WMAV2,
+    CODEC_ID_MACE3,
+    CODEC_ID_MACE6,
+    CODEC_ID_VMDAUDIO,
+    CODEC_ID_SONIC,
+    CODEC_ID_SONIC_LS,
+    CODEC_ID_FLAC,
+    CODEC_ID_MP3ADU,
+    CODEC_ID_MP3ON4,
+    CODEC_ID_SHORTEN,
+    CODEC_ID_ALAC,
+    CODEC_ID_WESTWOOD_SND1,
+    CODEC_ID_GSM, ///< as in Berlin toast format
+    CODEC_ID_QDM2,
+    CODEC_ID_COOK,
+    CODEC_ID_TRUESPEECH,
+    CODEC_ID_TTA,
+    CODEC_ID_SMACKAUDIO,
+    CODEC_ID_QCELP,
+    CODEC_ID_WAVPACK,
+    CODEC_ID_DSICINAUDIO,
+    CODEC_ID_IMC,
+    CODEC_ID_MUSEPACK7,
+    CODEC_ID_MLP,
+    CODEC_ID_GSM_MS, /* as found in WAV */
+    CODEC_ID_ATRAC3,
+    CODEC_ID_VOXWARE,
+    CODEC_ID_APE,
+    CODEC_ID_NELLYMOSER,
+    CODEC_ID_MUSEPACK8,
+    CODEC_ID_SPEEX,
+    CODEC_ID_WMAVOICE,
+    CODEC_ID_WMAPRO,
+    CODEC_ID_WMALOSSLESS,
+    CODEC_ID_ATRAC3P,
+    CODEC_ID_EAC3,
+    CODEC_ID_SIPR,
+    CODEC_ID_MP1,
+    CODEC_ID_TWINVQ,
+    CODEC_ID_TRUEHD,
+    /* subtitle codecs */
+    CODEC_ID_DVD_SUBTITLE= 0x17000,
+    CODEC_ID_DVB_SUBTITLE,
+    CODEC_ID_TEXT,  ///< raw UTF-8 text
+    CODEC_ID_XSUB,
+    CODEC_ID_SSA,
+    CODEC_ID_MOV_TEXT,
+    /* other specific kind of codecs (generally used for attachments) */
+    CODEC_ID_TTF= 0x18000,
+    CODEC_ID_PROBE= 0x19000, ///< codec_id is not known (like CODEC_ID_NONE) but lavf should attempt to identify it
+    CODEC_ID_MPEG2TS= 0x20000, /**< _FAKE_ codec to indicate a raw MPEG-2 TS
+                                * stream (only used by libavformat) */
+};
+enum CodecType {
+    CODEC_TYPE_UNKNOWN = -1,
+    CODEC_TYPE_VIDEO,
+    CODEC_TYPE_AUDIO,
+    CODEC_TYPE_DATA,
+    CODEC_TYPE_SUBTITLE,
+    CODEC_TYPE_ATTACHMENT,
+    CODEC_TYPE_NB
+};
+/**
+ * all in native-endian format
+ */
+enum SampleFormat {
+    SAMPLE_FMT_NONE = -1,
+    SAMPLE_FMT_U8,              ///< unsigned 8 bits
+    SAMPLE_FMT_S16,             ///< signed 16 bits
+    SAMPLE_FMT_S32,             ///< signed 32 bits
+    SAMPLE_FMT_FLT,             ///< float
+    SAMPLE_FMT_DBL,             ///< double
+    SAMPLE_FMT_NB               ///< Number of sample formats. DO NOT USE if dynamically linking to libavcodec
+};
+/* Audio channel masks */
+#define CH_FRONT_LEFT             0x00000001
+#define CH_FRONT_RIGHT            0x00000002
+#define CH_FRONT_CENTER           0x00000004
+#define CH_LOW_FREQUENCY          0x00000008
+#define CH_BACK_LEFT              0x00000010
+#define CH_BACK_RIGHT             0x00000020
+#define CH_FRONT_LEFT_OF_CENTER   0x00000040
+#define CH_FRONT_RIGHT_OF_CENTER  0x00000080
+#define CH_BACK_CENTER            0x00000100
+#define CH_SIDE_LEFT              0x00000200
+#define CH_SIDE_RIGHT             0x00000400
+#define CH_TOP_CENTER             0x00000800
+#define CH_TOP_FRONT_LEFT         0x00001000
+#define CH_TOP_FRONT_CENTER       0x00002000
+#define CH_TOP_FRONT_RIGHT        0x00004000
+#define CH_TOP_BACK_LEFT          0x00008000
+#define CH_TOP_BACK_CENTER        0x00010000
+#define CH_TOP_BACK_RIGHT         0x00020000
+#define CH_STEREO_LEFT            0x20000000  ///< Stereo downmix.
+#define CH_STEREO_RIGHT           0x40000000  ///< See CH_STEREO_LEFT.
+/* Audio channel convenience macros */
+#define CH_LAYOUT_MONO              (CH_FRONT_CENTER)
+#define CH_LAYOUT_STEREO            (CH_FRONT_LEFT|CH_FRONT_RIGHT)
+#define CH_LAYOUT_SURROUND          (CH_LAYOUT_STEREO|CH_FRONT_CENTER)
+#define CH_LAYOUT_QUAD              (CH_LAYOUT_STEREO|CH_BACK_LEFT|CH_BACK_RIGHT)
+#define CH_LAYOUT_5POINT0           (CH_LAYOUT_SURROUND|CH_SIDE_LEFT|CH_SIDE_RIGHT)
+#define CH_LAYOUT_5POINT1           (CH_LAYOUT_5POINT0|CH_LOW_FREQUENCY)
+#define CH_LAYOUT_7POINT1           (CH_LAYOUT_5POINT1|CH_BACK_LEFT|CH_BACK_RIGHT)
+#define CH_LAYOUT_7POINT1_WIDE      (CH_LAYOUT_SURROUND|CH_LOW_FREQUENCY|\
+                                          CH_BACK_LEFT|CH_BACK_RIGHT|\
+                                          CH_FRONT_LEFT_OF_CENTER|CH_FRONT_RIGHT_OF_CENTER)
+#define CH_LAYOUT_STEREO_DOWNMIX    (CH_STEREO_LEFT|CH_STEREO_RIGHT)
+/* in bytes */
+#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio
+/**
+ * Required number of additionally allocated bytes at the end of the input bitstream for decoding.
+ * This is mainly needed because some optimized bitstream readers read
+ * 32 or 64 bit at once and could read over the end.<br>
+ * Note: If the first 23 bits of the additional bytes are not 0, then damaged
+ * MPEG bitstreams could cause overread and segfault.
+ */
+#define FF_INPUT_BUFFER_PADDING_SIZE 8
+/**
+ * minimum encoding buffer size
+ * Used to avoid some checks during header writing.
+ */
+#define FF_MIN_BUFFER_SIZE 16384
+#if 0/* MT : DELETE THIS LINE.*/
+/**
+ * motion estimation type.
+ */
+enum Motion_Est_ID {
+    ME_ZERO = 1,    ///< no search, that is use 0,0 vector whenever one is needed
+    ME_FULL,
+    ME_LOG,
+    ME_PHODS,
+    ME_EPZS,        ///< enhanced predictive zonal search
+    ME_X1,          ///< reserved for experiments
+    ME_HEX,         ///< hexagon based search
+    ME_UMH,         ///< uneven multi-hexagon search
+    ME_ITER,        ///< iterative search
+    ME_TESA,        ///< transformed exhaustive search algorithm
+};
+enum AVDiscard{
+    /* We leave some space between them for extensions (drop some
+     * keyframes for intra-only or drop just some bidir frames). */
+    AVDISCARD_NONE   =-16, ///< discard nothing
+    AVDISCARD_DEFAULT=  0, ///< discard useless packets like 0 size packets in avi
+    AVDISCARD_NONREF =  8, ///< discard all non reference
+    AVDISCARD_BIDIR  = 16, ///< discard all bidirectional frames
+    AVDISCARD_NONKEY = 32, ///< discard all frames except keyframes
+    AVDISCARD_ALL    = 48, ///< discard all
+};
+typedef struct RcOverride{
+    int start_frame;
+    int end_frame;
+    int qscale; // If this is 0 then quality_factor will be used instead.
+    float quality_factor;
+} RcOverride;
+#define FF_MAX_B_FRAMES 16
+/* encoding support
+   These flags can be passed in AVCodecContext.flags before initialization.
+   Note: Not everything is supported yet.
+*/
+#define CODEC_FLAG_QSCALE 0x0002  ///< Use fixed qscale.
+#define CODEC_FLAG_4MV    0x0004  ///< 4 MV per MB allowed / advanced prediction for H.263.
+#define CODEC_FLAG_QPEL   0x0010  ///< Use qpel MC.
+#define CODEC_FLAG_GMC    0x0020  ///< Use GMC.
+#define CODEC_FLAG_MV0    0x0040  ///< Always try a MB with MV=<0,0>.
+#define CODEC_FLAG_PART   0x0080  ///< Use data partitioning.
+/**
+ * The parent program guarantees that the input for B-frames containing
+ * streams is not written to for at least s->max_b_frames+1 frames, if
+ * this is not set the input will be copied.
+ */
+#define CODEC_FLAG_INPUT_PRESERVED 0x0100
+#define CODEC_FLAG_PASS1           0x0200   ///< Use internal 2pass ratecontrol in first pass mode.
+#define CODEC_FLAG_PASS2           0x0400   ///< Use internal 2pass ratecontrol in second pass mode.
+#define CODEC_FLAG_EXTERN_HUFF     0x1000   ///< Use external Huffman table (for MJPEG).
+#define CODEC_FLAG_GRAY            0x2000   ///< Only decode/encode grayscale.
+#define CODEC_FLAG_EMU_EDGE        0x4000   ///< Don't draw edges.
+#define CODEC_FLAG_PSNR            0x8000   ///< error[?] variables will be set during encoding.
+#define CODEC_FLAG_TRUNCATED       0x00010000 /** Input bitstream might be truncated at a random
+                                                  location instead of only at frame boundaries. */
+#define CODEC_FLAG_NORMALIZE_AQP  0x00020000 ///< Normalize adaptive quantization.
+#define CODEC_FLAG_INTERLACED_DCT 0x00040000 ///< Use interlaced DCT.
+#define CODEC_FLAG_LOW_DELAY      0x00080000 ///< Force low delay.
+#define CODEC_FLAG_ALT_SCAN       0x00100000 ///< Use alternate scan.
+#define CODEC_FLAG_GLOBAL_HEADER  0x00400000 ///< Place global headers in extradata instead of every keyframe.
+#define CODEC_FLAG_BITEXACT       0x00800000 ///< Use only bitexact stuff (except (I)DCT).
+/* Fx : Flag for h263+ extra options */
+#define CODEC_FLAG_AC_PRED        0x01000000 ///< H.263 advanced intra coding / MPEG-4 AC prediction
+#define CODEC_FLAG_H263P_UMV      0x02000000 ///< unlimited motion vector
+#define CODEC_FLAG_CBP_RD         0x04000000 ///< Use rate distortion optimization for cbp.
+#define CODEC_FLAG_QP_RD          0x08000000 ///< Use rate distortion optimization for qp selectioon.
+#define CODEC_FLAG_H263P_AIV      0x00000008 ///< H.263 alternative inter VLC
+#define CODEC_FLAG_OBMC           0x00000001 ///< OBMC
+#define CODEC_FLAG_LOOP_FILTER    0x00000800 ///< loop filter
+#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000
+#define CODEC_FLAG_INTERLACED_ME  0x20000000 ///< interlaced motion estimation
+#define CODEC_FLAG_SVCD_SCAN_OFFSET 0x40000000 ///< Will reserve space for SVCD scan offset user data.
+#define CODEC_FLAG_CLOSED_GOP     0x80000000
+#define CODEC_FLAG2_FAST          0x00000001 ///< Allow non spec compliant speedup tricks.
+#define CODEC_FLAG2_STRICT_GOP    0x00000002 ///< Strictly enforce GOP size.
+#define CODEC_FLAG2_NO_OUTPUT     0x00000004 ///< Skip bitstream encoding.
+#define CODEC_FLAG2_LOCAL_HEADER  0x00000008 ///< Place global headers at every keyframe instead of in extradata.
+#define CODEC_FLAG2_BPYRAMID      0x00000010 ///< H.264 allow B-frames to be used as references.
+#define CODEC_FLAG2_WPRED         0x00000020 ///< H.264 weighted biprediction for B-frames
+#define CODEC_FLAG2_MIXED_REFS    0x00000040 ///< H.264 one reference per partition, as opposed to one reference per macroblock
+#define CODEC_FLAG2_8X8DCT        0x00000080 ///< H.264 high profile 8x8 transform
+#define CODEC_FLAG2_FASTPSKIP     0x00000100 ///< H.264 fast pskip
+#define CODEC_FLAG2_AUD           0x00000200 ///< H.264 access unit delimiters
+#define CODEC_FLAG2_BRDO          0x00000400 ///< B-frame rate-distortion optimization
+#define CODEC_FLAG2_INTRA_VLC     0x00000800 ///< Use MPEG-2 intra VLC table.
+#define CODEC_FLAG2_MEMC_ONLY     0x00001000 ///< Only do ME/MC (I frames -> ref, P frame -> ME+MC).
+#define CODEC_FLAG2_DROP_FRAME_TIMECODE 0x00002000 ///< timecode is in drop frame format.
+#define CODEC_FLAG2_SKIP_RD       0x00004000 ///< RD optimal MB level residual skipping
+#define CODEC_FLAG2_CHUNKS        0x00008000 ///< Input bitstream might be truncated at a packet boundaries instead of only at frame boundaries.
+#define CODEC_FLAG2_NON_LINEAR_QUANT 0x00010000 ///< Use MPEG-2 nonlinear quantizer.
+#define CODEC_FLAG2_BIT_RESERVOIR 0x00020000 ///< Use a bit reservoir when encoding if possible
+/* Unsupported options :
+ *              Syntax Arithmetic coding (SAC)
+ *              Reference Picture Selection
+ *              Independent Segment Decoding */
+/* /Fx */
+/* codec capabilities */
+#define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 ///< Decoder can use draw_horiz_band callback.
+/**
+ * Codec uses get_buffer() for allocating buffers.
+ * direct rendering method 1
+ */
+#define CODEC_CAP_DR1             0x0002
+/* If 'parse_only' field is true, then avcodec_parse_frame() can be used. */
+#define CODEC_CAP_PARSE_ONLY      0x0004
+#define CODEC_CAP_TRUNCATED       0x0008
+/* Codec can export data for HW decoding (XvMC). */
+#define CODEC_CAP_HWACCEL         0x0010
+/**
+ * Codec has a nonzero delay and needs to be fed with NULL at the end to get the delayed data.
+ * If this is not set, the codec is guaranteed to never be fed with NULL data.
+ */
+#define CODEC_CAP_DELAY           0x0020
+/**
+ * Codec can be fed a final frame with a smaller size.
+ * This can be used to prevent truncation of the last audio samples.
+ */
+#define CODEC_CAP_SMALL_LAST_FRAME 0x0040
+/**
+ * Codec can export data for HW decoding (VDPAU).
+ */
+#define CODEC_CAP_HWACCEL_VDPAU    0x0080
+//The following defines may change, don't expect compatibility if you use them.
+#define MB_TYPE_INTRA4x4   0x0001
+#define MB_TYPE_INTRA16x16 0x0002 //FIXME H.264-specific
+#define MB_TYPE_INTRA_PCM  0x0004 //FIXME H.264-specific
+#define MB_TYPE_16x16      0x0008
+#define MB_TYPE_16x8       0x0010
+#define MB_TYPE_8x16       0x0020
+#define MB_TYPE_8x8        0x0040
+#define MB_TYPE_INTERLACED 0x0080
+#define MB_TYPE_DIRECT2    0x0100 //FIXME
+#define MB_TYPE_ACPRED     0x0200
+#define MB_TYPE_GMC        0x0400
+#define MB_TYPE_SKIP       0x0800
+#define MB_TYPE_P0L0       0x1000
+#define MB_TYPE_P1L0       0x2000
+#define MB_TYPE_P0L1       0x4000
+#define MB_TYPE_P1L1       0x8000
+#define MB_TYPE_L0         (MB_TYPE_P0L0 | MB_TYPE_P1L0)
+#define MB_TYPE_L1         (MB_TYPE_P0L1 | MB_TYPE_P1L1)
+#define MB_TYPE_L0L1       (MB_TYPE_L0   | MB_TYPE_L1)
+#define MB_TYPE_QUANT      0x00010000
+#define MB_TYPE_CBP        0x00020000
+//Note bits 24-31 are reserved for codec specific use (h264 ref0, mpeg1 0mv, ...)
+/**
+ * Pan Scan area.
+ * This specifies the area which should be displayed.
+ * Note there may be multiple such areas for one frame.
+ */
+typedef struct AVPanScan{
+    /**
+     * id
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int id;
+    /**
+     * width and height in 1/16 pel
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int width;
+    int height;
+    /**
+     * position of the top left corner in 1/16 pel for up to 3 fields/frames
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int16_t position[3][2];
+}AVPanScan;
+#define FF_COMMON_FRAME \
+    /**\
+     * pointer to the picture planes.\
+     * This might be different from the first allocated byte\
+     * - encoding: \
+     * - decoding: \
+     */\
+    uint8_t *data[4];\
+    int linesize[4];\
+    /**\
+     * pointer to the first allocated byte of the picture. Can be used in get_buffer/release_buffer.\
+     * This isn't used by libavcodec unless the default get/release_buffer() is used.\
+     * - encoding: \
+     * - decoding: \
+     */\
+    uint8_t *base[4];\
+    /**\
+     * 1 -> keyframe, 0-> not\
+     * - encoding: Set by libavcodec.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int key_frame;\
+\
+    /**\
+     * Picture type of the frame, see ?_TYPE below.\
+     * - encoding: Set by libavcodec. for coded_picture (and set by user for input).\
+     * - decoding: Set by libavcodec.\
+     */\
+    int pict_type;\
+\
+    /**\
+     * presentation timestamp in time_base units (time when frame should be shown to user)\
+     * If AV_NOPTS_VALUE then frame_rate = 1/time_base will be assumed.\
+     * - encoding: MUST be set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int64_t pts;\
+\
+    /**\
+     * picture number in bitstream order\
+     * - encoding: set by\
+     * - decoding: Set by libavcodec.\
+     */\
+    int coded_picture_number;\
+    /**\
+     * picture number in display order\
+     * - encoding: set by\
+     * - decoding: Set by libavcodec.\
+     */\
+    int display_picture_number;\
+\
+    /**\
+     * quality (between 1 (good) and FF_LAMBDA_MAX (bad)) \
+     * - encoding: Set by libavcodec. for coded_picture (and set by user for input).\
+     * - decoding: Set by libavcodec.\
+     */\
+    int quality; \
+\
+    /**\
+     * buffer age (1->was last buffer and dint change, 2->..., ...).\
+     * Set to INT_MAX if the buffer has not been used yet.\
+     * - encoding: unused\
+     * - decoding: MUST be set by get_buffer().\
+     */\
+    int age;\
+\
+    /**\
+     * is this picture used as reference\
+     * The values for this are the same as the MpegEncContext.picture_structure\
+     * variable, that is 1->top field, 2->bottom field, 3->frame/both fields.\
+     * Set to 4 for delayed, non-reference frames.\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec. (before get_buffer() call)).\
+     */\
+    int reference;\
+\
+    /**\
+     * QP table\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    int8_t *qscale_table;\
+    /**\
+     * QP store stride\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    int qstride;\
+\
+    /**\
+     * mbskip_table[mb]>=1 if MB didn't change\
+     * stride= mb_width = (width+15)>>4\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    uint8_t *mbskip_table;\
+\
+    /**\
+     * motion vector table\
+     * @code\
+     * example:\
+     * int mv_sample_log2= 4 - motion_subsample_log2;\
+     * int mb_width= (width+15)>>4;\
+     * int mv_stride= (mb_width << mv_sample_log2) + 1;\
+     * motion_val[direction][x + y*mv_stride][0->mv_x, 1->mv_y];\
+     * @endcode\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int16_t (*motion_val[2])[2];\
+\
+    /**\
+     * macroblock type table\
+     * mb_type_base + mb_width + 2\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    uint32_t *mb_type;\
+\
+    /**\
+     * log2 of the size of the block which a single vector in motion_val represents: \
+     * (4->16x16, 3->8x8, 2-> 4x4, 1-> 2x2)\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    uint8_t motion_subsample_log2;\
+\
+    /**\
+     * for some private data of the user\
+     * - encoding: unused\
+     * - decoding: Set by user.\
+     */\
+    void *opaque;\
+\
+    /**\
+     * error\
+     * - encoding: Set by libavcodec. if flags&CODEC_FLAG_PSNR.\
+     * - decoding: unused\
+     */\
+    uint64_t error[4];\
+\
+    /**\
+     * type of the buffer (to keep track of who has to deallocate data[*])\
+     * - encoding: Set by the one who allocates it.\
+     * - decoding: Set by the one who allocates it.\
+     * Note: User allocated (direct rendering) & internal buffers cannot coexist currently.\
+     */\
+    int type;\
+    \
+    /**\
+     * When decoding, this signals how much the picture must be delayed.\
+     * extra_delay = repeat_pict / (2*fps)\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    int repeat_pict;\
+    \
+    /**\
+     * \
+     */\
+    int qscale_type;\
+    \
+    /**\
+     * The content of the picture is interlaced.\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec. (default 0)\
+     */\
+    int interlaced_frame;\
+    \
+    /**\
+     * If the content is interlaced, is top field displayed first.\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int top_field_first;\
+    \
+    /**\
+     * Pan scan.\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    AVPanScan *pan_scan;\
+    \
+    /**\
+     * Tell user application that palette has changed from previous frame.\
+     * - encoding: ??? (no palette-enabled encoder yet)\
+     * - decoding: Set by libavcodec. (default 0).\
+     */\
+    int palette_has_changed;\
+    \
+    /**\
+     * codec suggestion on buffer type if != 0\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec. (before get_buffer() call)).\
+     */\
+    int buffer_hints;\
+\
+    /**\
+     * DCT coefficients\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    short *dct_coeff;\
+\
+    /**\
+     * motion referece frame index\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int8_t *ref_index[2];\
+\
+    /**\
+     * reordered opaque 64bit number (generally a PTS) from AVCodecContext.reordered_opaque\
+     * output in AVFrame.reordered_opaque\
+     * - encoding: unused\
+     * - decoding: Read by user.\
+     */\
+    int64_t reordered_opaque;\
+\
+    /**\
+     * hardware accelerator private data (FFmpeg allocated)\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec\
+     */\
+    void *hwaccel_picture_private;\
+#define FF_QSCALE_TYPE_MPEG1 0
+#define FF_QSCALE_TYPE_MPEG2 1
+#define FF_QSCALE_TYPE_H264  2
+#define FF_BUFFER_TYPE_INTERNAL 1
+#define FF_BUFFER_TYPE_USER     2 ///< direct rendering buffers (image is (de)allocated by user)
+#define FF_BUFFER_TYPE_SHARED   4 ///< Buffer from somewhere else; don't deallocate image (data/base), all other tables are not shared.
+#define FF_BUFFER_TYPE_COPY     8 ///< Just a (modified) copy of some other buffer, don't deallocate anything.
+#define FF_I_TYPE  1 ///< Intra
+#define FF_P_TYPE  2 ///< Predicted
+#define FF_B_TYPE  3 ///< Bi-dir predicted
+#define FF_S_TYPE  4 ///< S(GMC)-VOP MPEG4
+#define FF_SI_TYPE 5 ///< Switching Intra
+#define FF_SP_TYPE 6 ///< Switching Predicted
+#define FF_BI_TYPE 7
+#define FF_BUFFER_HINTS_VALID    0x01 // Buffer hints value is meaningful (if 0 ignore).
+#define FF_BUFFER_HINTS_READABLE 0x02 // Codec will read from buffer.
+#define FF_BUFFER_HINTS_PRESERVE 0x04 // User must not alter buffer content.
+#define FF_BUFFER_HINTS_REUSABLE 0x08 // Codec will reuse the buffer (update).
+/**
+ * Audio Video Frame.
+ * New fields can be added to the end of FF_COMMON_FRAME with minor version
+ * bumps.
+ * Removal, reordering and changes to existing fields require a major
+ * version bump. No fields should be added into AVFrame before or after
+ * FF_COMMON_FRAME!
+ * sizeof(AVFrame) must not be used outside libav*.
+ */
+typedef struct AVFrame {
+    FF_COMMON_FRAME
+} AVFrame;
+#endif/* MT : DELETE THIS LINE.*/
+/**
+ * main external API structure.
+ * New fields can be added to the end with minor version bumps.
+ * Removal, reordering and changes to existing fields require a major
+ * version bump.
+ * sizeof(AVCodecContext) must not be used outside libav*.
+ */
+typedef struct AVCodecContext {
+    /**
+     * information on struct for av_log
+     * - set by avcodec_alloc_context
+     */
+    const AVClass *av_class;
+    /**
+     * the average bitrate
+     * - encoding: Set by user; unused for constant quantizer encoding.
+     * - decoding: Set by libavcodec. 0 or some bitrate if this info is available in the stream.
+     */
+    int bit_rate;
+    /**
+     * number of bits the bitstream is allowed to diverge from the reference.
+     *           the reference can be CBR (for CBR pass1) or VBR (for pass2)
+     * - encoding: Set by user; unused for constant quantizer encoding.
+     * - decoding: unused
+     */
+//MT:    int bit_rate_tolerance;
+    /**
+     * CODEC_FLAG_*.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+//MT:    int flags;
+    /**
+     * Some codecs need additional format info. It is stored here.
+     * If any muxer uses this then ALL demuxers/parsers AND encoders for the
+     * specific codec MUST set it correctly otherwise stream copy breaks.
+     * In general use of this field by muxers is not recommanded.
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec. (FIXME: Is this OK?)
+     */
+//MT:    int sub_id;
+    /**
+     * Motion estimation algorithm used for video coding.
+     * 1 (zero), 2 (full), 3 (log), 4 (phods), 5 (epzs), 6 (x1), 7 (hex),
+     * 8 (umh), 9 (iter), 10 (tesa) [7, 8, 10 are x264 specific, 9 is snow specific]
+     * - encoding: MUST be set by user.
+     * - decoding: unused
+     */
+//MT:    int me_method;
+    /**
+     * some codecs need / can use extradata like Huffman tables.
+     * mjpeg: Huffman tables
+     * rv10: additional flags
+     * mpeg4: global headers (they can be in the bitstream or here)
+     * The allocated memory should be FF_INPUT_BUFFER_PADDING_SIZE bytes larger
+     * than extradata_size to avoid prolems if it is read with the bitstream reader.
+     * The bytewise contents of extradata must not depend on the architecture or CPU endianness.
+     * - encoding: Set/allocated/freed by libavcodec.
+     * - decoding: Set/allocated/freed by user.
+     */
+    uint8_t *extradata;
+    int extradata_size;
+    /**
+     * This is the fundamental unit of time (in seconds) in terms
+     * of which frame timestamps are represented. For fixed-fps content,
+     * timebase should be 1/framerate and timestamp increments should be
+     * identically 1.
+     * - encoding: MUST be set by user.
+     * - decoding: Set by libavcodec.
+     */
+//MT:    AVRational time_base;
+    /* video only */
+    /**
+     * picture width / height.
+     * - encoding: MUST be set by user.
+     * - decoding: Set by libavcodec.
+     * Note: For compatibility it is possible to set this instead of
+     * coded_width/height before decoding.
+     */
+//MT:    int width, height;
+//MT:#define FF_ASPECT_EXTENDED 15
+    /**
+     * the number of pictures in a group of pictures, or 0 for intra_only
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int gop_size;
+    /**
+     * Pixel format, see PIX_FMT_xxx.
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:   enum PixelFormat pix_fmt;
+    /**
+     * Frame rate emulation. If not zero, the lower layer (i.e. format handler)
+     * has to read frames at native frame rate.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int rate_emu;
+    /**
+     * If non NULL, 'draw_horiz_band' is called by the libavcodec
+     * decoder to draw a horizontal band. It improves cache usage. Not
+     * all codecs can do that. You must check the codec capabilities
+     * beforehand.
+     * The function is also used by hardware acceleration APIs.
+     * It is called at least once during frame decoding to pass
+     * the data needed for hardware render.
+     * In that mode instead of pixel data, AVFrame points to
+     * a structure specific to the acceleration API. The application
+     * reads the structure and can change some fields to indicate progress
+     * or mark state.
+     * - encoding: unused
+     * - decoding: Set by user.
+     * @param height the height of the slice
+     * @param y the y position of the slice
+     * @param type 1->top field, 2->bottom field, 3->frame
+     * @param offset offset into the AVFrame.data from which the slice should be read
+     */
+ //MT:   void (*draw_horiz_band)(struct AVCodecContext *s,
+ //Mt:                           const AVFrame *src, int offset[4],
+ //MT:                           int y, int type, int height);
+    /* audio only */
+    int sample_rate; ///< samples per second
+    int channels;    ///< number of audio channels
+    /**
+     * audio sample format
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    enum SampleFormat sample_fmt;  ///< sample format, currently unused
+    /* The following data should not be initialized. */
+    /**
+     * Samples per packet, initialized when calling 'init'.
+     */
+    int frame_size;
+    int frame_number;   ///< audio or video frame number
+//MT:    int real_pict_num;  ///< Returns the real picture number of previous encoded frame.
+    /**
+     * Number of frames the decoded output will be delayed relative to
+     * the encoded input.
+     * - encoding: Set by libavcodec.
+     * - decoding: unused
+     */
+//MT:    int delay;
+    /* - encoding parameters */
+//MT:    float qcompress;  ///< amount of qscale change between easy & hard scenes (0.0-1.0)
+//MT:    float qblur;      ///< amount of qscale smoothing over time (0.0-1.0)
+    /**
+     * minimum quantizer
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int qmin;
+    /**
+     * maximum quantizer
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int qmax;
+    /**
+     * maximum quantizer difference between frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int max_qdiff;
+    /**
+     * maximum number of B-frames between non-B-frames
+     * Note: The output will be delayed by max_b_frames+1 relative to the input.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int max_b_frames;
+    /**
+     * qscale factor between IP and B-frames
+     * If > 0 then the last P-frame quantizer will be used (q= lastp_q*factor+offset).
+     * If < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float b_quant_factor;
+    /** obsolete FIXME remove */
+//MT:    int rc_strategy;
+//MT:#define FF_RC_STRATEGY_XVID 1
+//MT:    int b_frame_strategy;
+    /**
+     * hurry up amount
+     * - encoding: unused
+     * - decoding: Set by user. 1-> Skip B-frames, 2-> Skip IDCT/dequant too, 5-> Skip everything except header
+     * @deprecated Deprecated in favor of skip_idct and skip_frame.
+     */
+//MT:    int hurry_up;
+    struct AVCodec *codec;
+    void *priv_data;
+//MT:    int rtp_payload_size;   /* The size of the RTP payload: the coder will  */
+                            /* do its best to deliver a chunk with size     */
+                            /* below rtp_payload_size, the chunk will start */
+                            /* with a start code on some codecs like H.263. */
+                            /* This doesn't take account of any particular  */
+                            /* headers inside the transmitted RTP payload.  */
+    /* The RTP callback: This function is called    */
+    /* every time the encoder has a packet to send. */
+    /* It depends on the encoder if the data starts */
+    /* with a Start Code (it should). H.263 does.   */
+    /* mb_nb contains the number of macroblocks     */
+    /* encoded in the RTP payload.                  */
+//MT:    void (*rtp_callback)(struct AVCodecContext *avctx, void *data, int size, int mb_nb);
+    /* statistics, used for 2-pass encoding */
+//MT:    int mv_bits;
+//MT:    int header_bits;
+//MT:    int i_tex_bits;
+ //MT:   int p_tex_bits;
+//MT:    int i_count;
+ //MT:   int p_count;
+ //MT:   int skip_count;
+ //MT:   int misc_bits;
+    /**
+     * number of bits used for the previously encoded frame
+     * - encoding: Set by libavcodec.
+     * - decoding: unused
+     */
+//MT:    int frame_bits;
+    /**
+     * Private data of the user, can be used to carry app specific stuff.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+//MT:    void *opaque;
+    char codec_name[32];
+    enum CodecType codec_type; /* see CODEC_TYPE_xxx */
+    enum CodecID codec_id; /* see CODEC_ID_xxx */
+    /**
+     * fourcc (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
+     * This is used to work around some encoder bugs.
+     * A demuxer should set this to what is stored in the field used to identify the codec.
+     * If there are multiple such fields in a container then the demuxer should choose the one
+     * which maximizes the information about the used codec.
+     * If the codec tag field in a container is larger then 32 bits then the demuxer should
+     * remap the longer ID to 32 bits with a table or other structure. Alternatively a new
+     * extra_codec_tag + size could be added but for this a clear advantage must be demonstrated
+     * first.
+     * - encoding: Set by user, if not then the default based on codec_id will be used.
+     * - decoding: Set by user, will be converted to uppercase by libavcodec during init.
+     */
+//MT:    unsigned int codec_tag;
+    /**
+     * Work around bugs in encoders which sometimes cannot be detected automatically.
+     * - encoding: Set by user
+     * - decoding: Set by user
+     */
+//MT:    int workaround_bugs;
+#define FF_BUG_AUTODETECT       1  ///< autodetection
+#define FF_BUG_OLD_MSMPEG4      2
+#define FF_BUG_XVID_ILACE       4
+#define FF_BUG_UMP4             8
+#define FF_BUG_NO_PADDING       16
+#define FF_BUG_AMV              32
+#define FF_BUG_AC_VLC           0  ///< Will be removed, libavcodec can now handle these non-compliant files by default.
+#define FF_BUG_QPEL_CHROMA      64
+#define FF_BUG_STD_QPEL         128
+#define FF_BUG_QPEL_CHROMA2     256
+#define FF_BUG_DIRECT_BLOCKSIZE 512
+#define FF_BUG_EDGE             1024
+#define FF_BUG_HPEL_CHROMA      2048
+#define FF_BUG_DC_CLIP          4096
+#define FF_BUG_MS               8192 ///< Work around various bugs in Microsoft's broken decoders.
+//#define FF_BUG_FAKE_SCALABILITY 16 //Autodetection should work 100%.
+    /**
+     * luma single coefficient elimination threshold
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int luma_elim_threshold;
+    /**
+     * chroma single coeff elimination threshold
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int chroma_elim_threshold;
+    /**
+     * strictly follow the standard (MPEG4, ...).
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     * Setting this to STRICT or higher means the encoder and decoder will
+     * generally do stupid things. While setting it to inofficial or lower
+     * will mean the encoder might use things that are not supported by all
+     * spec compliant decoders. Decoders make no difference between normal,
+     * inofficial and experimental, that is they always try to decode things
+     * when they can unless they are explicitly asked to behave stupid
+     * (=strictly conform to the specs)
+     */
+//MT:    int strict_std_compliance;
+#define FF_COMPLIANCE_VERY_STRICT   2 ///< Strictly conform to a older more strict version of the spec or reference software.
+#define FF_COMPLIANCE_STRICT        1 ///< Strictly conform to all the things in the spec no matter what consequences.
+#define FF_COMPLIANCE_NORMAL        0
+#define FF_COMPLIANCE_INOFFICIAL   -1 ///< Allow inofficial extensions.
+#define FF_COMPLIANCE_EXPERIMENTAL -2 ///< Allow nonstandardized experimental things.
+    /**
+     * qscale offset between IP and B-frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float b_quant_offset;
+    /**
+     * Error recognization; higher values will detect more errors but may
+     * misdetect some more or less valid parts as errors.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+//MT:    int error_recognition;
+#define FF_ER_CAREFUL         1
+#define FF_ER_COMPLIANT       2
+#define FF_ER_AGGRESSIVE      3
+#define FF_ER_VERY_AGGRESSIVE 4
+    /**
+     * Called at the beginning of each frame to get a buffer for it.
+     * If pic.reference is set then the frame will be read later by libavcodec.
+     * avcodec_align_dimensions() should be used to find the required width and
+     * height, as they normally need to be rounded up to the next multiple of 16.
+     * - encoding: unused
+     * - decoding: Set by libavcodec., user can override.
+     */
+ //MT:   int (*get_buffer)(struct AVCodecContext *c, AVFrame *pic);
+    /**
+     * Called to release buffers which were allocated with get_buffer.
+     * A released buffer can be reused in get_buffer().
+     * pic.data[*] must be set to NULL.
+     * - encoding: unused
+     * - decoding: Set by libavcodec., user can override.
+     */
+//MT:    void (*release_buffer)(struct AVCodecContext *c, AVFrame *pic);
+    /**
+     * Size of the frame reordering buffer in the decoder.
+     * For MPEG-2 it is 1 IPB or 0 low delay IP.
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+//MT:    int has_b_frames;
+    /**
+     * number of bytes per packet if constant and known or 0
+     * Used by some WAV based audio codecs.
+     */
+    int block_align;
+//MT:    int parse_only; /* - decoding only: If true, only parsing is done
+                       /*(function avcodec_parse_frame()). The frame
+                       data is returned. Only MPEG codecs support this now. */
+    /**
+     * 0-> h263 quant 1-> mpeg quant
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int mpeg_quant;
+    /**
+     * pass1 encoding statistics output buffer
+     * - encoding: Set by libavcodec.
+     * - decoding: unused
+     */
+//MT:    char *stats_out;
+    /**
+     * pass2 encoding statistics input buffer
+     * Concatenated stuff from stats_out of pass1 should be placed here.
+     * - encoding: Allocated/set/freed by user.
+     * - decoding: unused
+     */
+//MT:    char *stats_in;
+    /**
+     * ratecontrol qmin qmax limiting method
+     * 0-> clipping, 1-> use a nice continous function to limit qscale wthin qmin/qmax.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float rc_qsquish;
+//MT:    float rc_qmod_amp;
+ //MT:   int rc_qmod_freq;
+    /**
+     * ratecontrol override, see RcOverride
+     * - encoding: Allocated/set/freed by user.
+     * - decoding: unused
+     */
+ //MT:   RcOverride *rc_override;
+ //MT:   int rc_override_count;
+    /**
+     * rate control equation
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+//MT:    const char *rc_eq;
+    /**
+     * maximum bitrate
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int rc_max_rate;
+    /**
+     * minimum bitrate
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int rc_min_rate;
+    /**
+     * decoder bitstream buffer size
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int rc_buffer_size;
+ //MT:   float rc_buffer_aggressivity;
+    /**
+     * qscale factor between P and I-frames
+     * If > 0 then the last p frame quantizer will be used (q= lastp_q*factor+offset).
+     * If < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float i_quant_factor;
+    /**
+     * qscale offset between P and I-frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float i_quant_offset;
+    /**
+     * initial complexity for pass1 ratecontrol
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float rc_initial_cplx;
+    /**
+     * DCT algorithm, see FF_DCT_* below
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int dct_algo;
+#define FF_DCT_AUTO    0
+#define FF_DCT_FASTINT 1
+#define FF_DCT_INT     2
+#define FF_DCT_MMX     3
+#define FF_DCT_MLIB    4
+#define FF_DCT_ALTIVEC 5
+#define FF_DCT_FAAN    6
+    /**
+     * luminance masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   float lumi_masking;
+    /**
+     * temporary complexity masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   float temporal_cplx_masking;
+    /**
+     * spatial complexity masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   float spatial_cplx_masking;
+    /**
+     * p block masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   float p_masking;
+    /**
+     * darkness masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   float dark_masking;
+    /**
+     * IDCT algorithm, see FF_IDCT_* below.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+ //MT:   int idct_algo;
+#define FF_IDCT_AUTO          0
+#define FF_IDCT_INT           1
+#define FF_IDCT_SIMPLE        2
+#define FF_IDCT_SIMPLEMMX     3
+#define FF_IDCT_LIBMPEG2MMX   4
+#define FF_IDCT_PS2           5
+#define FF_IDCT_MLIB          6
+#define FF_IDCT_ARM           7
+#define FF_IDCT_ALTIVEC       8
+#define FF_IDCT_SH4           9
+#define FF_IDCT_SIMPLEARM     10
+#define FF_IDCT_H264          11
+#define FF_IDCT_VP3           12
+#define FF_IDCT_IPP           13
+#define FF_IDCT_XVIDMMX       14
+#define FF_IDCT_CAVS          15
+#define FF_IDCT_SIMPLEARMV5TE 16
+#define FF_IDCT_SIMPLEARMV6   17
+#define FF_IDCT_SIMPLEVIS     18
+#define FF_IDCT_WMV2          19
+#define FF_IDCT_FAAN          20
+#define FF_IDCT_EA            21
+#define FF_IDCT_SIMPLENEON    22
+#define FF_IDCT_SIMPLEALPHA   23
+    /**
+     * slice count
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by user (or 0).
+     */
+//MT:    int slice_count;
+    /**
+     * slice offsets in the frame in bytes
+     * - encoding: Set/allocated by libavcodec.
+     * - decoding: Set/allocated by user (or NULL).
+     */
+ //MT:   int *slice_offset;
+    /**
+     * error concealment flags
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+//MT:    int error_concealment;
+#define FF_EC_GUESS_MVS   1
+#define FF_EC_DEBLOCK     2
+    /**
+     * dsp_mask could be add used to disable unwanted CPU features
+     * CPU features (i.e. MMX, SSE. ...)
+     *
+     * With the FORCE flag you may instead enable given CPU features.
+     * (Dangerous: Usable in case of misdetection, improper usage however will
+     * result into program crash.)
+     */
+ //MT:   unsigned dsp_mask;
+#define FF_MM_FORCE    0x80000000 /* Force usage of selected flags (OR) */
+    /* lower 16 bits - CPU features */
+#define FF_MM_MMX      0x0001 ///< standard MMX
+#define FF_MM_3DNOW    0x0004 ///< AMD 3DNOW
+#define FF_MM_MMXEXT   0x0002 ///< SSE integer functions or AMD MMX ext
+#define FF_MM_SSE      0x0008 ///< SSE functions
+#define FF_MM_SSE2     0x0010 ///< PIV SSE2 functions
+#define FF_MM_3DNOWEXT 0x0020 ///< AMD 3DNowExt
+#define FF_MM_SSE3     0x0040 ///< Prescott SSE3 functions
+#define FF_MM_SSSE3    0x0080 ///< Conroe SSSE3 functions
+#define FF_MM_IWMMXT   0x0100 ///< XScale IWMMXT
+#define FF_MM_ALTIVEC  0x0001 ///< standard AltiVec
+    /**
+     * bits per sample/pixel from the demuxer (needed for huffyuv).
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by user.
+     */
+ //MT:    int bits_per_coded_sample;
+    /**
+     * prediction method (needed for huffyuv)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:    int prediction_method;
+#define FF_PRED_LEFT   0
+#define FF_PRED_PLANE  1
+#define FF_PRED_MEDIAN 2
+    /**
+     * sample aspect ratio (0 if unknown)
+     * That is the width of a pixel divided by the height of the pixel.
+     * Numerator and denominator must be relatively prime and smaller than 256 for some video standards.
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:   AVRational sample_aspect_ratio;
+    /**
+     * the picture in the bitstream
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:   AVFrame *coded_frame;
+    /**
+     * debug
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+ //MT:   int debug;
+#define FF_DEBUG_PICT_INFO   1
+#define FF_DEBUG_RC          2
+#define FF_DEBUG_BITSTREAM   4
+#define FF_DEBUG_MB_TYPE     8
+#define FF_DEBUG_QP          16
+#define FF_DEBUG_MV          32
+#define FF_DEBUG_DCT_COEFF   0x00000040
+#define FF_DEBUG_SKIP        0x00000080
+#define FF_DEBUG_STARTCODE   0x00000100
+#define FF_DEBUG_PTS         0x00000200
+#define FF_DEBUG_ER          0x00000400
+#define FF_DEBUG_MMCO        0x00000800
+#define FF_DEBUG_BUGS        0x00001000
+#define FF_DEBUG_VIS_QP      0x00002000
+#define FF_DEBUG_VIS_MB_TYPE 0x00004000
+#define FF_DEBUG_BUFFERS     0x00008000
+    /**
+     * debug
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+ //MT:   int debug_mv;
+#define FF_DEBUG_VIS_MV_P_FOR  0x00000001 //visualize forward predicted MVs of P frames
+#define FF_DEBUG_VIS_MV_B_FOR  0x00000002 //visualize forward predicted MVs of B frames
+#define FF_DEBUG_VIS_MV_B_BACK 0x00000004 //visualize backward predicted MVs of B frames
+    /**
+     * error
+     * - encoding: Set by libavcodec if flags&CODEC_FLAG_PSNR.
+     * - decoding: unused
+     */
+    uint64_t error[4];
+    /**
+     * minimum MB quantizer
+     * - encoding: unused
+     * - decoding: unused
+     */
+//MT:    int mb_qmin;
+    /**
+     * maximum MB quantizer
+     * - encoding: unused
+     * - decoding: unused
+     */
+//MT:    int mb_qmax;
+    /**
+     * motion estimation comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int me_cmp;
+    /**
+     * subpixel motion estimation comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int me_sub_cmp;
+    /**
+     * macroblock comparison function (not supported yet)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int mb_cmp;
+    /**
+     * interlaced DCT comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int ildct_cmp;
+#define FF_CMP_SAD    0
+#define FF_CMP_SSE    1
+#define FF_CMP_SATD   2
+#define FF_CMP_DCT    3
+#define FF_CMP_PSNR   4
+#define FF_CMP_BIT    5
+#define FF_CMP_RD     6
+#define FF_CMP_ZERO   7
+#define FF_CMP_VSAD   8
+#define FF_CMP_VSSE   9
+#define FF_CMP_NSSE   10
+#define FF_CMP_W53    11
+#define FF_CMP_W97    12
+#define FF_CMP_DCTMAX 13
+#define FF_CMP_DCT264 14
+#define FF_CMP_CHROMA 256
+    /**
+     * ME diamond size & shape
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int dia_size;
+    /**
+     * amount of previous MV predictors (2a+1 x 2a+1 square)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int last_predictor_count;
+    /**
+     * prepass for motion estimation
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int pre_me;
+    /**
+     * motion estimation prepass comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int me_pre_cmp;
+    /**
+     * ME prepass diamond size & shape
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int pre_dia_size;
+    /**
+     * subpel ME quality
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int me_subpel_quality;
+    /**
+     * callback to negotiate the pixelFormat
+     * @param fmt is the list of formats which are supported by the codec,
+     * it is terminated by -1 as 0 is a valid format, the formats are ordered by quality.
+     * The first is always the native one.
+     * @return the chosen format
+     * - encoding: unused
+     * - decoding: Set by user, if not set the native format will be chosen.
+     */
+ //MT:   enum PixelFormat (*get_format)(struct AVCodecContext *s, const enum PixelFormat * fmt);
+    /**
+     * DTG active format information (additional aspect ratio
+     * information only used in DVB MPEG-2 transport streams)
+     * 0 if not set.
+     *
+     * - encoding: unused
+     * - decoding: Set by decoder.
+     */
+ //MT:   int dtg_active_format;
+#define FF_DTG_AFD_SAME         8
+#define FF_DTG_AFD_4_3          9
+#define FF_DTG_AFD_16_9         10
+#define FF_DTG_AFD_14_9         11
+#define FF_DTG_AFD_4_3_SP_14_9  13
+#define FF_DTG_AFD_16_9_SP_14_9 14
+#define FF_DTG_AFD_SP_4_3       15
+    /**
+     * maximum motion estimation search range in subpel units
+     * If 0 then no limit.
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int me_range;
+    /**
+     * intra quantizer bias
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int intra_quant_bias;
+#define FF_DEFAULT_QUANT_BIAS 999999
+    /**
+     * inter quantizer bias
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int inter_quant_bias;
+    /**
+     * color table ID
+     * - encoding: unused
+     * - decoding: Which clrtable should be used for 8bit RGB images.
+     *             Tables have to be stored somewhere. FIXME
+     */
+ //MT:   int color_table_id;
+    /**
+     * internal_buffer count
+     * Don't touch, used by libavcodec default_get_buffer().
+     */
+ //MT:   int internal_buffer_count;
+    /**
+     * internal_buffers
+     * Don't touch, used by libavcodec default_get_buffer().
+     */
+   void *internal_buffer;
+#define FF_LAMBDA_SHIFT 7
+#define FF_LAMBDA_SCALE (1<<FF_LAMBDA_SHIFT)
+#define FF_QP2LAMBDA 118 ///< factor to convert from H.263 QP to lambda
+#define FF_LAMBDA_MAX (256*128-1)
+#define FF_QUALITY_SCALE FF_LAMBDA_SCALE //FIXME maybe remove
+    /**
+     * Global quality for codecs which cannot change it per frame.
+     * This should be proportional to MPEG-1/2/4 qscale.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int global_quality;
+#define FF_CODER_TYPE_VLC       0
+#define FF_CODER_TYPE_AC        1
+#define FF_CODER_TYPE_RAW       2
+#define FF_CODER_TYPE_RLE       3
+#define FF_CODER_TYPE_DEFLATE   4
+    /**
+     * coder type
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int coder_type;
+    /**
+     * context model
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int context_model;
+#if 0
+    /**
+     *
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    uint8_t * (*realloc)(struct AVCodecContext *s, uint8_t *buf, int buf_size);
+#endif
+    /**
+     * slice flags
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:   int slice_flags;
+#define SLICE_FLAG_CODED_ORDER    0x0001 ///< draw_horiz_band() is called in coded order instead of display
+#define SLICE_FLAG_ALLOW_FIELD    0x0002 ///< allow draw_horiz_band() with field slices (MPEG2 field pics)
+#define SLICE_FLAG_ALLOW_PLANE    0x0004 ///< allow draw_horiz_band() with 1 component at a time (SVQ1)
+    /**
+     * XVideo Motion Acceleration
+     * - encoding: forbidden
+     * - decoding: set by decoder
+     */
+//MT:    int xvmc_acceleration;
+    /**
+     * macroblock decision mode
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int mb_decision;
+#define FF_MB_DECISION_SIMPLE 0        ///< uses mb_cmp
+#define FF_MB_DECISION_BITS   1        ///< chooses the one which needs the fewest bits
+#define FF_MB_DECISION_RD     2        ///< rate distortion
+    /**
+     * custom intra quantization matrix
+     * - encoding: Set by user, can be NULL.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:   uint16_t *intra_matrix;
+    /**
+     * custom inter quantization matrix
+     * - encoding: Set by user, can be NULL.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:   uint16_t *inter_matrix;
+    /**
+     * fourcc from the AVI stream header (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
+     * This is used to work around some encoder bugs.
+     * - encoding: unused
+     * - decoding: Set by user, will be converted to uppercase by libavcodec during init.
+     */
+ //MT:   unsigned int stream_codec_tag;
+    /**
+     * scene change detection threshold
+     * 0 is default, larger means fewer detected scene changes.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int scenechange_threshold;
+    /**
+     * minimum Lagrange multipler
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int lmin;
+    /**
+     * maximum Lagrange multipler
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int lmax;
+    /**
+     * palette control structure
+     * - encoding: ??? (no palette-enabled encoder yet)
+     * - decoding: Set by user.
+     */
+ //MT:   struct AVPaletteControl *palctrl;
+    /**
+     * noise reduction strength
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int noise_reduction;
+    /**
+     * Called at the beginning of a frame to get cr buffer for it.
+     * Buffer type (size, hints) must be the same. libavcodec won't check it.
+     * libavcodec will pass previous buffer in pic, function should return
+     * same buffer or new buffer with old frame "painted" into it.
+     * If pic.data[0] == NULL must behave like get_buffer().
+     * - encoding: unused
+     * - decoding: Set by libavcodec., user can override
+     */
+//MT:    int (*reget_buffer)(struct AVCodecContext *c, AVFrame *pic);
+    /**
+     * Number of bits which should be loaded into the rc buffer before decoding starts.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int rc_initial_buffer_occupancy;
+    /**
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int inter_threshold;
+    /**
+     * CODEC_FLAG2_*
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+ //MT:   int flags2;
+    /**
+     * Simulates errors in the bitstream to test error concealment.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int error_rate;
+    /**
+     * MP3 antialias algorithm, see FF_AA_* below.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:   int antialias_algo;
+#define FF_AA_AUTO    0
+#define FF_AA_FASTINT 1 //not implemented yet
+#define FF_AA_INT     2
+#define FF_AA_FLOAT   3
+    /**
+     * quantizer noise shaping
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int quantizer_noise_shaping;
+    /**
+     * thread count
+     * is used to decide how many independent tasks should be passed to execute()
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+ //MT:   int thread_count;
+    /**
+     * The codec may call this to execute several independent things.
+     * It will return only after finishing all tasks.
+     * The user may replace this with some multithreaded implementation,
+     * the default implementation will execute the parts serially.
+     * @param count the number of things to execute
+     * - encoding: Set by libavcodec, user can override.
+     * - decoding: Set by libavcodec, user can override.
+     */
+ //MT:   int (*execute)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg), void *arg2, int *ret, int count, int size);
+    /**
+     * thread opaque
+     * Can be used by execute() to store some per AVCodecContext stuff.
+     * - encoding: set by execute()
+     * - decoding: set by execute()
+     */
+ //MT:   void *thread_opaque;
+    /**
+     * Motion estimation threshold below which no motion estimation is
+     * performed, but instead the user specified motion vectors are used.
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:     int me_threshold;
+    /**
+     * Macroblock threshold below which the user specified macroblock types will be used.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:    int mb_threshold;
+    /**
+     * precision of the intra DC coefficient - 8
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:     int intra_dc_precision;
+    /**
+     * noise vs. sse weight for the nsse comparsion function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:     int nsse_weight;
+    /**
+     * Number of macroblock rows at the top which are skipped.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:    int skip_top;
+    /**
+     * Number of macroblock rows at the bottom which are skipped.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:    int skip_bottom;
+    /**
+     * profile
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:    int profile;
+#define FF_PROFILE_UNKNOWN -99
+#define FF_PROFILE_AAC_MAIN 0
+#define FF_PROFILE_AAC_LOW  1
+#define FF_PROFILE_AAC_SSR  2
+#define FF_PROFILE_AAC_LTP  3
+    /**
+     * level
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:    int level;
+#define FF_LEVEL_UNKNOWN -99
+    /**
+     * low resolution decoding, 1-> 1/2 size, 2->1/4 size
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:    int lowres;
+    /**
+     * Bitstream width / height, may be different from width/height if lowres
+     * or other things are used.
+     * - encoding: unused
+     * - decoding: Set by user before init if known. Codec should override / dynamically change if needed.
+     */
+ //MT:   int coded_width, coded_height;
+    /**
+     * frame skip threshold
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int frame_skip_threshold;
+    /**
+     * frame skip factor
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int frame_skip_factor;
+    /**
+     * frame skip exponent
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int frame_skip_exp;
+    /**
+     * frame skip comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int frame_skip_cmp;
+    /**
+     * Border processing masking, raises the quantizer for mbs on the borders
+     * of the picture.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float border_masking;
+    /**
+     * minimum MB lagrange multipler
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int mb_lmin;
+    /**
+     * maximum MB lagrange multipler
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int mb_lmax;
+    /**
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int me_penalty_compensation;
+    /**
+     *
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:   enum AVDiscard skip_loop_filter;
+    /**
+     *
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:   enum AVDiscard skip_idct;
+    /**
+     *
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:   enum AVDiscard skip_frame;
+    /**
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int bidir_refine;
+    /**
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int brd_scale;
+    /**
+     * constant rate factor - quality-based VBR - values ~correspond to qps
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   float crf;
+    /**
+     * constant quantization parameter rate control method
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int cqp;
+    /**
+     * minimum GOP size
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int keyint_min;
+    /**
+     * number of reference frames
+     * - encoding: Set by user.
+     * - decoding: Set by lavc.
+     */
+ //MT:   int refs;
+    /**
+     * chroma qp offset from luma
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int chromaoffset;
+    /**
+     * Influences how often B-frames are used.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int bframebias;
+    /**
+     * trellis RD quantization
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int trellis;
+    /**
+     * Reduce fluctuations in qp (before curve compression).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float complexityblur;
+    /**
+     * in-loop deblocking filter alphac0 parameter
+     * alpha is in the range -6...6
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int deblockalpha;
+    /**
+     * in-loop deblocking filter beta parameter
+     * beta is in the range -6...6
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int deblockbeta;
+    /**
+     * macroblock subpartition sizes to consider - p8x8, p4x4, b8x8, i8x8, i4x4
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int partitions;
+#define X264_PART_I4X4 0x001  /* Analyze i4x4 */
+#define X264_PART_I8X8 0x002  /* Analyze i8x8 (requires 8x8 transform) */
+#define X264_PART_P8X8 0x010  /* Analyze p16x8, p8x16 and p8x8 */
+#define X264_PART_P4X4 0x020  /* Analyze p8x4, p4x8, p4x4 */
+#define X264_PART_B8X8 0x100  /* Analyze b16x8, b8x16 and b8x8 */
+    /**
+     * direct MV prediction mode - 0 (none), 1 (spatial), 2 (temporal), 3 (auto)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int directpred;
+    /**
+     * Audio cutoff bandwidth (0 means "automatic")
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int cutoff;
+    /**
+     * Multiplied by qscale for each frame and added to scene_change_score.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int scenechange_factor;
+    /**
+     *
+     * Note: Value depends upon the compare function used for fullpel ME.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int mv0_threshold;
+    /**
+     * Adjusts sensitivity of b_frame_strategy 1.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int b_sensitivity;
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int compression_level;
+#define FF_COMPRESSION_DEFAULT -1
+    /**
+     * Sets whether to use LPC mode - used by FLAC encoder.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int use_lpc;
+    /**
+     * LPC coefficient precision - used by FLAC encoder
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int lpc_coeff_precision;
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int min_prediction_order;
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int max_prediction_order;
+    /**
+     * search method for selecting prediction order
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int prediction_order_method;
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int min_partition_order;
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int max_partition_order;
+    /**
+     * GOP timecode frame start number, in non drop frame format
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+  //MT:  int64_t timecode_frame_start;
+#if LIBAVCODEC_VERSION_MAJOR < 53
+    /**
+     * Decoder should decode to this many channels if it can (0 for default)
+     * - encoding: unused
+     * - decoding: Set by user.
+     * @deprecated Deprecated in favor of request_channel_layout.
+     */
+ //MT:   int request_channels;
+#endif
+    /**
+     * Percentage of dynamic range compression to be applied by the decoder.
+     * The default value is 1.0, corresponding to full compression.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+  //MT:  float drc_scale;
+    /**
+     * opaque 64bit number (generally a PTS) that will be reordered and
+     * output in AVFrame.reordered_opaque
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:   int64_t reordered_opaque;
+    /**
+     * Bits per sample/pixel of internal libavcodec pixel/sample format.
+     * This field is applicable only when sample_fmt is SAMPLE_FMT_S32.
+     * - encoding: set by user.
+     * - decoding: set by libavcodec.
+     */
+ //MT:   int bits_per_raw_sample;
+    /**
+     * Audio channel layout.
+     * - encoding: set by user.
+     * - decoding: set by libavcodec.
+     */
+    int64_t channel_layout;
+    /**
+     * Request decoder to use this channel layout if it can (0 for default)
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+//MT:    int64_t request_channel_layout;
+    /**
+     * Ratecontrol attempt to use, at maximum, <value> of what can be used without an underflow.
+     * - encoding: Set by user.
+     * - decoding: unused.
+     */
+ //MT:   float rc_max_available_vbv_use;
+    /**
+     * Ratecontrol attempt to use, at least, <value> times the amount needed to prevent a vbv overflow.
+     * - encoding: Set by user.
+     * - decoding: unused.
+     */
+//MT:    float rc_min_vbv_overflow_use;
+    /**
+     * Hardware accelerator in use
+     * - encoding: unused.
+     * - decoding: Set by libavcodec
+     */
+//MT:    struct AVHWAccel *hwaccel;
+    /**
+     * For some codecs, the time base is closer to the field rate than the frame rate.
+     * Most notably, H.264 and MPEG-2 specify time_base as half of frame duration
+     * if no telecine is used ...
+     *
+     * Set to time_base ticks per frame. Default 1, e.g., H.264/MPEG-2 set it to 2.
+     */
+//MT:    int ticks_per_frame;
+    /**
+     * Hardware accelerator context.
+     * For some hardware accelerators, a global context needs to be
+     * provided by the user. In that case, this holds display-dependent
+     * data FFmpeg cannot instantiate itself. Please refer to the
+     * FFmpeg HW accelerator documentation to know how to fill this
+     * is. e.g. for VA API, this is a struct vaapi_context.
+     * - encoding: unused
+     * - decoding: Set by user
+     */
+ //MT:   void *hwaccel_context;
+} AVCodecContext;
+/**
+ * AVCodec.
+ */
+typedef struct AVCodec {
+    /**
+     * Name of the codec implementation.
+     * The name is globally unique among encoders and among decoders (but an
+     * encoder and a decoder can share the same name).
+     * This is the primary way to find a codec from the user perspective.
+     */
+    const char *name;
+    enum CodecType type;
+    enum CodecID id;
+    int priv_data_size;
+    int (*init)(AVCodecContext *);
+    int (*encode)(AVCodecContext *, uint8_t *buf, int buf_size, void *data);
+    int (*close)(AVCodecContext *);
+    int (*decode)(AVCodecContext *, void *outdata, int *outdata_size,
+                  const uint8_t *buf, int buf_size);
+    /**
+     * Codec capabilities.
+     * see CODEC_CAP_*
+     */
+//MT:    int capabilities;
+//MT:    struct AVCodec *next;
+    /**
+     * Flush buffers.
+     * Will be called when seeking
+     */
+//MT:    void (*flush)(AVCodecContext *);
+ //MT:   const AVRational *supported_framerates; ///< array of supported framerates, or NULL if any, array is terminated by {0,0}
+ //MT:   const enum PixelFormat *pix_fmts;       ///< array of supported pixel formats, or NULL if unknown, array is terminated by -1
+    /**
+     * Descriptive name for the codec, meant to be more human readable than \p name.
+     * You \e should use the NULL_IF_CONFIG_SMALL() macro to define it.
+     */
+    const char *long_name;
+    const int *supported_samplerates;       ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0
+    const enum SampleFormat *sample_fmts;   ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
+    const int64_t *channel_layouts;         ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
+} AVCodec;
+#if 0 /* MT : DELETE THIS LINE ONLY. */
+/**
+ * AVHWAccel.
+ */
+typedef struct AVHWAccel {
+    /**
+     * Name of the hardware accelerated codec.
+     * The name is globally unique among encoders and among decoders (but an
+     * encoder and a decoder can share the same name).
+     */
+    const char *name;
+    /**
+     * Type of codec implemented by the hardware accelerator.
+     *
+     * See CODEC_TYPE_xxx
+     */
+    enum CodecType type;
+    /**
+     * Codec implemented by the hardware accelerator.
+     *
+     * See CODEC_ID_xxx
+     */
+    enum CodecID id;
+    /**
+     * Supported pixel format.
+     *
+     * Only hardware accelerated formats are supported here.
+     */
+    enum PixelFormat pix_fmt;
+    /**
+     * Hardware accelerated codec capabilities.
+     * see FF_HWACCEL_CODEC_CAP_*
+     */
+    int capabilities;
+    struct AVHWAccel *next;
+    /**
+     * Called at the beginning of each frame or field picture.
+     *
+     * Meaningful frame information (codec specific) is guaranteed to
+     * be parsed at this point. This function is mandatory.
+     *
+     * Note that \p buf can be NULL along with \p buf_size set to 0.
+     * Otherwise, this means the whole frame is available at this point.
+     *
+     * @param avctx the codec context
+     * @param buf the frame data buffer base
+     * @param buf_size the size of the frame in bytes
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
+    /**
+     * Callback for each slice.
+     *
+     * Meaningful slice information (codec specific) is guaranteed to
+     * be parsed at this point. This function is mandatory.
+     *
+     * @param avctx the codec context
+     * @param buf the slice data buffer base
+     * @param buf_size the size of the slice in bytes
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
+    /**
+     * Called at the end of each frame or field picture.
+     *
+     * The whole picture is parsed at this point and can now be sent
+     * to the hardware accelerator. This function is mandatory.
+     *
+     * @param avctx the codec context
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*end_frame)(AVCodecContext *avctx);
+    /**
+     * Size of HW accelerator private data.
+     *
+     * Private data is allocated with av_mallocz() before
+     * AVCodecContext.get_buffer() and deallocated after
+     * AVCodecContext.release_buffer().
+     */
+    int priv_data_size;
+} AVHWAccel;
+/**
+ * four components are given, that's all.
+ * the last component is alpha
+ */
+typedef struct AVPicture {
+    uint8_t *data[4];
+    int linesize[4];       ///< number of bytes per line
+} AVPicture;
+/**
+ * AVPaletteControl
+ * This structure defines a method for communicating palette changes
+ * between and demuxer and a decoder.
+ *
+ * @deprecated Use AVPacket to send palette changes instead.
+ * This is totally broken.
+ */
+#define AVPALETTE_SIZE 1024
+#define AVPALETTE_COUNT 256
+typedef struct AVPaletteControl {
+    /* Demuxer sets this to 1 to indicate the palette has changed;
+     * decoder resets to 0. */
+    int palette_changed;
+    /* 4-byte ARGB palette entries, stored in native byte order; note that
+     * the individual palette components should be on a 8-bit scale; if
+     * the palette data comes from an IBM VGA native format, the component
+     * data is probably 6 bits in size and needs to be scaled. */
+    unsigned int palette[AVPALETTE_COUNT];
+} AVPaletteControl attribute_deprecated;
+enum AVSubtitleType {
+    SUBTITLE_NONE,
+    SUBTITLE_BITMAP,                ///< A bitmap, pict will be set
+    /**
+     * Plain text, the text field must be set by the decoder and is
+     * authoritative. ass and pict fields may contain approximations.
+     */
+    SUBTITLE_TEXT,
+    /**
+     * Formatted text, the ass field must be set by the decoder and is
+     * authoritative. pict and text fields may contain approximations.
+     */
+    SUBTITLE_ASS,
+};
+typedef struct AVSubtitleRect {
+    int x;         ///< top left corner  of pict, undefined when pict is not set
+    int y;         ///< top left corner  of pict, undefined when pict is not set
+    int w;         ///< width            of pict, undefined when pict is not set
+    int h;         ///< height           of pict, undefined when pict is not set
+    int nb_colors; ///< number of colors in pict, undefined when pict is not set
+    /**
+     * data+linesize for the bitmap of this subtitle.
+     * can be set for text/ass as well once they where rendered
+     */
+    AVPicture pict;
+    enum AVSubtitleType type;
+    char *text;                     ///< 0 terminated plain UTF-8 text
+    /**
+     * 0 terminated ASS/SSA compatible event line.
+     * The pressentation of this is unaffected by the other values in this
+     * struct.
+     */
+    char *ass;
+} AVSubtitleRect;
+typedef struct AVSubtitle {
+    uint16_t format; /* 0 = graphics */
+    uint32_t start_display_time; /* relative to packet pts, in ms */
+    uint32_t end_display_time; /* relative to packet pts, in ms */
+    unsigned num_rects;
+    AVSubtitleRect **rects;
+} AVSubtitle;
+/* resample.c */
+struct ReSampleContext;
+struct AVResampleContext;
+typedef struct ReSampleContext ReSampleContext;
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/**
+ * @deprecated Use av_audio_resample_init() instead.
+ */
+attribute_deprecated ReSampleContext *audio_resample_init(int output_channels, int input_channels,
+                                                          int output_rate, int input_rate);
+#endif
+/**
+ *  Initializes audio resampling context
+ *
+ * @param output_channels  number of output channels
+ * @param input_channels   number of input channels
+ * @param output_rate      output sample rate
+ * @param input_rate       input sample rate
+ * @param sample_fmt_out   requested output sample format
+ * @param sample_fmt_in    input sample format
+ * @param filter_length    length of each FIR filter in the filterbank relative to the cutoff freq
+ * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
+ * @param linear           If 1 then the used FIR filter will be linearly interpolated
+                           between the 2 closest, if 0 the closest will be used
+ * @param cutoff           cutoff frequency, 1.0 corresponds to half the output sampling rate
+ * @return allocated ReSampleContext, NULL if error occured
+ */
+ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
+                                        int output_rate, int input_rate,
+                                        enum SampleFormat sample_fmt_out,
+                                        enum SampleFormat sample_fmt_in,
+                                        int filter_length, int log2_phase_count,
+                                        int linear, double cutoff);
+int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples);
+void audio_resample_close(ReSampleContext *s);
+/**
+ * Initializes an audio resampler.
+ * Note, if either rate is not an integer then simply scale both rates up so they are.
+ * @param filter_length length of each FIR filter in the filterbank relative to the cutoff freq
+ * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
+ * @param linear If 1 then the used FIR filter will be linearly interpolated
+                 between the 2 closest, if 0 the closest will be used
+ * @param cutoff cutoff frequency, 1.0 corresponds to half the output sampling rate
+ */
+struct AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_length, int log2_phase_count, int linear, double cutoff);
+/**
+ * resamples.
+ * @param src an array of unconsumed samples
+ * @param consumed the number of samples of src which have been consumed are returned here
+ * @param src_size the number of unconsumed samples available
+ * @param dst_size the amount of space in samples available in dst
+ * @param update_ctx If this is 0 then the context will not be modified, that way several channels can be resampled with the same context.
+ * @return the number of samples written in dst or -1 if an error occurred
+ */
+int av_resample(struct AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx);
+/**
+ * Compensates samplerate/timestamp drift. The compensation is done by changing
+ * the resampler parameters, so no audible clicks or similar distortions occur
+ * @param compensation_distance distance in output samples over which the compensation should be performed
+ * @param sample_delta number of output samples which should be output less
+ *
+ * example: av_resample_compensate(c, 10, 500)
+ * here instead of 510 samples only 500 samples would be output
+ *
+ * note, due to rounding the actual compensation might be slightly different,
+ * especially if the compensation_distance is large and the in_rate used during init is small
+ */
+void av_resample_compensate(struct AVResampleContext *c, int sample_delta, int compensation_distance);
+void av_resample_close(struct AVResampleContext *c);
+/**
+ * Allocate memory for a picture.  Call avpicture_free to free it.
+ *
+ * @param picture the picture to be filled in
+ * @param pix_fmt the format of the picture
+ * @param width the width of the picture
+ * @param height the height of the picture
+ * @return zero if successful, a negative value if not
+ */
+int avpicture_alloc(AVPicture *picture, int pix_fmt, int width, int height);
+/**
+ * Free a picture previously allocated by avpicture_alloc().
+ *
+ * @param picture the AVPicture to be freed
+ */
+void avpicture_free(AVPicture *picture);
+/**
+ * Fill in the AVPicture fields.
+ * The fields of the given AVPicture are filled in by using the 'ptr' address
+ * which points to the image data buffer. Depending on the specified picture
+ * format, one or multiple image data pointers and line sizes will be set.
+ * If a planar format is specified, several pointers will be set pointing to
+ * the different picture planes and the line sizes of the different planes
+ * will be stored in the lines_sizes array.
+ *
+ * @param picture AVPicture whose fields are to be filled in
+ * @param ptr Buffer which will contain or contains the actual image data
+ * @param pix_fmt The format in which the picture data is stored.
+ * @param width the width of the image in pixels
+ * @param height the height of the image in pixels
+ * @return size of the image data in bytes
+ */
+int avpicture_fill(AVPicture *picture, uint8_t *ptr,
+                   int pix_fmt, int width, int height);
+int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height,
+                     unsigned char *dest, int dest_size);
+/**
+ * Calculate the size in bytes that a picture of the given width and height
+ * would occupy if stored in the given picture format.
+ *
+ * @param pix_fmt the given picture format
+ * @param width the width of the image
+ * @param height the height of the image
+ * @return Image data size in bytes
+ */
+int avpicture_get_size(int pix_fmt, int width, int height);
+void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift);
+const char *avcodec_get_pix_fmt_name(int pix_fmt);
+void avcodec_set_dimensions(AVCodecContext *s, int width, int height);
+enum PixelFormat avcodec_get_pix_fmt(const char* name);
+unsigned int avcodec_pix_fmt_to_codec_tag(enum PixelFormat p);
+#define FF_LOSS_RESOLUTION  0x0001 /**< loss due to resolution change */
+#define FF_LOSS_DEPTH       0x0002 /**< loss due to color depth change */
+#define FF_LOSS_COLORSPACE  0x0004 /**< loss due to color space conversion */
+#define FF_LOSS_ALPHA       0x0008 /**< loss of alpha bits */
+#define FF_LOSS_COLORQUANT  0x0010 /**< loss due to color quantization */
+#define FF_LOSS_CHROMA      0x0020 /**< loss of chroma (e.g. RGB to gray conversion) */
+/**
+ * Computes what kind of losses will occur when converting from one specific
+ * pixel format to another.
+ * When converting from one pixel format to another, information loss may occur.
+ * For example, when converting from RGB24 to GRAY, the color information will
+ * be lost. Similarly, other losses occur when converting from some formats to
+ * other formats. These losses can involve loss of chroma, but also loss of
+ * resolution, loss of color depth, loss due to the color space conversion, loss
+ * of the alpha bits or loss due to color quantization.
+ * avcodec_get_fix_fmt_loss() informs you about the various types of losses
+ * which will occur when converting from one pixel format to another.
+ *
+ * @param[in] dst_pix_fmt destination pixel format
+ * @param[in] src_pix_fmt source pixel format
+ * @param[in] has_alpha Whether the source pixel format alpha channel is used.
+ * @return Combination of flags informing you what kind of losses will occur.
+ */
+int avcodec_get_pix_fmt_loss(int dst_pix_fmt, int src_pix_fmt,
+                             int has_alpha);
+/**
+ * Finds the best pixel format to convert to given a certain source pixel
+ * format.  When converting from one pixel format to another, information loss
+ * may occur.  For example, when converting from RGB24 to GRAY, the color
+ * information will be lost. Similarly, other losses occur when converting from
+ * some formats to other formats. avcodec_find_best_pix_fmt() searches which of
+ * the given pixel formats should be used to suffer the least amount of loss.
+ * The pixel formats from which it chooses one, are determined by the
+ * \p pix_fmt_mask parameter.
+ *
+ * @code
+ * src_pix_fmt = PIX_FMT_YUV420P;
+ * pix_fmt_mask = (1 << PIX_FMT_YUV422P) || (1 << PIX_FMT_RGB24);
+ * dst_pix_fmt = avcodec_find_best_pix_fmt(pix_fmt_mask, src_pix_fmt, alpha, &loss);
+ * @endcode
+ *
+ * @param[in] pix_fmt_mask bitmask determining which pixel format to choose from
+ * @param[in] src_pix_fmt source pixel format
+ * @param[in] has_alpha Whether the source pixel format alpha channel is used.
+ * @param[out] loss_ptr Combination of flags informing you what kind of losses will occur.
+ * @return The best pixel format to convert to or -1 if none was found.
+ */
+int avcodec_find_best_pix_fmt(int64_t pix_fmt_mask, int src_pix_fmt,
+                              int has_alpha, int *loss_ptr);
+/**
+ * Print in buf the string corresponding to the pixel format with
+ * number pix_fmt, or an header if pix_fmt is negative.
+ *
+ * @param[in] buf the buffer where to write the string
+ * @param[in] buf_size the size of buf
+ * @param[in] pix_fmt the number of the pixel format to print the corresponding info string, or
+ * a negative value to print the corresponding header.
+ * Meaningful values for obtaining a pixel format info vary from 0 to PIX_FMT_NB -1.
+ */
+void avcodec_pix_fmt_string (char *buf, int buf_size, int pix_fmt);
+#define FF_ALPHA_TRANSP       0x0001 /* image has some totally transparent pixels */
+#define FF_ALPHA_SEMI_TRANSP  0x0002 /* image has some transparent pixels */
+/**
+ * Tell if an image really has transparent alpha values.
+ * @return ored mask of FF_ALPHA_xxx constants
+ */
+int img_get_alpha_info(const AVPicture *src,
+                       int pix_fmt, int width, int height);
+/* deinterlace a picture */
+/* deinterlace - if not supported return -1 */
+int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
+                          int pix_fmt, int width, int height);
+/* external high level API */
+/**
+ * If c is NULL, returns the first registered codec,
+ * if c is non-NULL, returns the next registered codec after c,
+ * or NULL if c is the last one.
+ */
+AVCodec *av_codec_next(AVCodec *c);
+/**
+ * Returns the LIBAVCODEC_VERSION_INT constant.
+ */
+unsigned avcodec_version(void);
+/**
+ * Initializes libavcodec.
+ *
+ * @warning This function \e must be called before any other libavcodec
+ * function.
+ */
+void avcodec_init(void);
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/**
+ * @deprecated Deprecated in favor of avcodec_register().
+ */
+attribute_deprecated void register_avcodec(AVCodec *codec);
+#endif
+/**
+ * Register the codec \p codec and initialize libavcodec.
+ *
+ * @see avcodec_init()
+ */
+void avcodec_register(AVCodec *codec);
+/**
+ * Finds a registered encoder with a matching codec ID.
+ *
+ * @param id CodecID of the requested encoder
+ * @return An encoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_encoder(enum CodecID id);
+/**
+ * Finds a registered encoder with the specified name.
+ *
+ * @param name name of the requested encoder
+ * @return An encoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_encoder_by_name(const char *name);
+/**
+ * Finds a registered decoder with a matching codec ID.
+ *
+ * @param id CodecID of the requested decoder
+ * @return A decoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_decoder(enum CodecID id);
+/**
+ * Finds a registered decoder with the specified name.
+ *
+ * @param name name of the requested decoder
+ * @return A decoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_decoder_by_name(const char *name);
+void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode);
+/**
+ * Sets the fields of the given AVCodecContext to default values.
+ *
+ * @param s The AVCodecContext of which the fields should be set to default values.
+ */
+void avcodec_get_context_defaults(AVCodecContext *s);
+/** THIS FUNCTION IS NOT YET PART OF THE PUBLIC API!
+ *  we WILL change its arguments and name a few times! */
+void avcodec_get_context_defaults2(AVCodecContext *s, enum CodecType);
+/**
+ * Allocates an AVCodecContext and sets its fields to default values.  The
+ * resulting struct can be deallocated by simply calling av_free().
+ *
+ * @return An AVCodecContext filled with default values or NULL on failure.
+ * @see avcodec_get_context_defaults
+ */
+AVCodecContext *avcodec_alloc_context(void);
+/** THIS FUNCTION IS NOT YET PART OF THE PUBLIC API!
+ *  we WILL change its arguments and name a few times! */
+AVCodecContext *avcodec_alloc_context2(enum CodecType);
+/**
+ * Sets the fields of the given AVFrame to default values.
+ *
+ * @param pic The AVFrame of which the fields should be set to default values.
+ */
+void avcodec_get_frame_defaults(AVFrame *pic);
+/**
+ * Allocates an AVFrame and sets its fields to default values.  The resulting
+ * struct can be deallocated by simply calling av_free().
+ *
+ * @return An AVFrame filled with default values or NULL on failure.
+ * @see avcodec_get_frame_defaults
+ */
+AVFrame *avcodec_alloc_frame(void);
+int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic);
+void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic);
+int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic);
+void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height);
+/**
+ * Checks if the given dimension of a picture is valid, meaning that all
+ * bytes of the picture can be addressed with a signed int.
+ *
+ * @param[in] w Width of the picture.
+ * @param[in] h Height of the picture.
+ * @return Zero if valid, a negative value if invalid.
+ */
+int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h);
+enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum PixelFormat * fmt);
+int avcodec_thread_init(AVCodecContext *s, int thread_count);
+void avcodec_thread_free(AVCodecContext *s);
+int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size);
+int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size);
+//FIXME func typedef
+/**
+ * Initializes the AVCodecContext to use the given AVCodec. Prior to using this
+ * function the context has to be allocated.
+ *
+ * The functions avcodec_find_decoder_by_name(), avcodec_find_encoder_by_name(),
+ * avcodec_find_decoder() and avcodec_find_encoder() provide an easy way for
+ * retrieving a codec.
+ *
+ * @warning This function is not thread safe!
+ *
+ * @code
+ * avcodec_register_all();
+ * codec = avcodec_find_decoder(CODEC_ID_H264);
+ * if (!codec)
+ *     exit(1);
+ *
+ * context = avcodec_alloc_context();
+ *
+ * if (avcodec_open(context, codec) < 0)
+ *     exit(1);
+ * @endcode
+ *
+ * @param avctx The context which will be set up to use the given codec.
+ * @param codec The codec to use within the context.
+ * @return zero on success, a negative value on error
+ * @see avcodec_alloc_context, avcodec_find_decoder, avcodec_find_encoder
+ */
+int avcodec_open(AVCodecContext *avctx, AVCodec *codec);
+/**
+ * Decodes an audio frame from \p buf into \p samples.
+ * The avcodec_decode_audio2() function decodes an audio frame from the input
+ * buffer \p buf of size \p buf_size. To decode it, it makes use of the
+ * audio codec which was coupled with \p avctx using avcodec_open(). The
+ * resulting decoded frame is stored in output buffer \p samples.  If no frame
+ * could be decompressed, \p frame_size_ptr is zero. Otherwise, it is the
+ * decompressed frame size in \e bytes.
+ *
+ * @warning You \e must set \p frame_size_ptr to the allocated size of the
+ * output buffer before calling avcodec_decode_audio2().
+ *
+ * @warning The input buffer must be \c FF_INPUT_BUFFER_PADDING_SIZE larger than
+ * the actual read bytes because some optimized bitstream readers read 32 or 64
+ * bits at once and could read over the end.
+ *
+ * @warning The end of the input buffer \p buf should be set to 0 to ensure that
+ * no overreading happens for damaged MPEG streams.
+ *
+ * @note You might have to align the input buffer \p buf and output buffer \p
+ * samples. The alignment requirements depend on the CPU: On some CPUs it isn't
+ * necessary at all, on others it won't work at all if not aligned and on others
+ * it will work but it will have an impact on performance. In practice, the
+ * bitstream should have 4 byte alignment at minimum and all sample data should
+ * be 16 byte aligned unless the CPU doesn't need it (AltiVec and SSE do). If
+ * the linesize is not a multiple of 16 then there's no sense in aligning the
+ * start of the buffer to 16.
+ *
+ * @param avctx the codec context
+ * @param[out] samples the output buffer
+ * @param[in,out] frame_size_ptr the output buffer size in bytes
+ * @param[in] buf the input buffer
+ * @param[in] buf_size the input buffer size in bytes
+ * @return On error a negative value is returned, otherwise the number of bytes
+ * used or zero if no frame could be decompressed.
+ */
+int avcodec_decode_audio2(AVCodecContext *avctx, int16_t *samples,
+                         int *frame_size_ptr,
+                         const uint8_t *buf, int buf_size);
+/**
+ * Decodes a video frame from \p buf into \p picture.
+ * The avcodec_decode_video() function decodes a video frame from the input
+ * buffer \p buf of size \p buf_size. To decode it, it makes use of the
+ * video codec which was coupled with \p avctx using avcodec_open(). The
+ * resulting decoded frame is stored in \p picture.
+ *
+ * @warning The input buffer must be \c FF_INPUT_BUFFER_PADDING_SIZE larger than
+ * the actual read bytes because some optimized bitstream readers read 32 or 64
+ * bits at once and could read over the end.
+ *
+ * @warning The end of the input buffer \p buf should be set to 0 to ensure that
+ * no overreading happens for damaged MPEG streams.
+ *
+ * @note You might have to align the input buffer \p buf and output buffer \p
+ * samples. The alignment requirements depend on the CPU: on some CPUs it isn't
+ * necessary at all, on others it won't work at all if not aligned and on others
+ * it will work but it will have an impact on performance. In practice, the
+ * bitstream should have 4 byte alignment at minimum and all sample data should
+ * be 16 byte aligned unless the CPU doesn't need it (AltiVec and SSE do). If
+ * the linesize is not a multiple of 16 then there's no sense in aligning the
+ * start of the buffer to 16.
+ *
+ * @note Some codecs have a delay between input and output, these need to be
+ * feeded with buf=NULL, buf_size=0 at the end to return the remaining frames.
+ *
+ * @param avctx the codec context
+ * @param[out] picture The AVFrame in which the decoded video frame will be stored.
+ * @param[in] buf the input buffer
+ * @param[in] buf_size the size of the input buffer in bytes
+ * @param[in,out] got_picture_ptr Zero if no frame could be decompressed, otherwise, it is nonzero.
+ * @return On error a negative value is returned, otherwise the number of bytes
+ * used or zero if no frame could be decompressed.
+ */
+int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
+                         int *got_picture_ptr,
+                         const uint8_t *buf, int buf_size);
+/* Decode a subtitle message. Return -1 if error, otherwise return the
+ * number of bytes used. If no subtitle could be decompressed,
+ * got_sub_ptr is zero. Otherwise, the subtitle is stored in *sub. */
+int avcodec_decode_subtitle(AVCodecContext *avctx, AVSubtitle *sub,
+                            int *got_sub_ptr,
+                            const uint8_t *buf, int buf_size);
+int avcodec_parse_frame(AVCodecContext *avctx, uint8_t **pdata,
+                        int *data_size_ptr,
+                        uint8_t *buf, int buf_size);
+/**
+ * Encodes an audio frame from \p samples into \p buf.
+ * The avcodec_encode_audio() function encodes an audio frame from the input
+ * buffer \p samples. To encode it, it makes use of the audio codec which was
+ * coupled with \p avctx using avcodec_open(). The resulting encoded frame is
+ * stored in output buffer \p buf.
+ *
+ * @note The output buffer should be at least \c FF_MIN_BUFFER_SIZE bytes large.
+ *
+ * @param avctx the codec context
+ * @param[out] buf the output buffer
+ * @param[in] buf_size the output buffer size
+ * @param[in] samples the input buffer containing the samples
+ * The number of samples read from this buffer is frame_size*channels,
+ * both of which are defined in \p avctx.
+ * For PCM audio the number of samples read from \p samples is equal to
+ * \p buf_size * input_sample_size / output_sample_size.
+ * @return On error a negative value is returned, on success zero or the number
+ * of bytes used to encode the data read from the input buffer.
+ */
+int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                         const short *samples);
+/**
+ * Encodes a video frame from \p pict into \p buf.
+ * The avcodec_encode_video() function encodes a video frame from the input
+ * \p pict. To encode it, it makes use of the video codec which was coupled with
+ * \p avctx using avcodec_open(). The resulting encoded bytes representing the
+ * frame are stored in the output buffer \p buf. The input picture should be
+ * stored using a specific format, namely \c avctx.pix_fmt.
+ *
+ * @param avctx the codec context
+ * @param[out] buf the output buffer for the bitstream of encoded frame
+ * @param[in] buf_size the size of the output buffer in bytes
+ * @param[in] pict the input picture to encode
+ * @return On error a negative value is returned, on success zero or the number
+ * of bytes used from the output buffer.
+ */
+int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                         const AVFrame *pict);
+int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                            const AVSubtitle *sub);
+int avcodec_close(AVCodecContext *avctx);
+/**
+ * Register all the codecs, parsers and bitstream filters which were enabled at
+ * configuration time. If you do not call this function you can select exactly
+ * which formats you want to support, by using the individual registration
+ * functions.
+ *
+ * @see avcodec_register
+ * @see av_register_codec_parser
+ * @see av_register_bitstream_filter
+ */
+void avcodec_register_all(void);
+/**
+ * Flush buffers, should be called when seeking or when switching to a different stream.
+ */
+void avcodec_flush_buffers(AVCodecContext *avctx);
+void avcodec_default_free_buffers(AVCodecContext *s);
+/* misc useful functions */
+/**
+ * Returns a single letter to describe the given picture type \p pict_type.
+ *
+ * @param[in] pict_type the picture type
+ * @return A single character representing the picture type.
+ */
+char av_get_pict_type_char(int pict_type);
+/**
+ * Returns codec bits per sample.
+ *
+ * @param[in] codec_id the codec
+ * @return Number of bits per sample or zero if unknown for the given codec.
+ */
+int av_get_bits_per_sample(enum CodecID codec_id);
+/**
+ * Returns sample format bits per sample.
+ *
+ * @param[in] sample_fmt the sample format
+ * @return Number of bits per sample or zero if unknown for the given sample format.
+ */
+int av_get_bits_per_sample_format(enum SampleFormat sample_fmt);
+/* frame parsing */
+typedef struct AVCodecParserContext {
+    void *priv_data;
+    struct AVCodecParser *parser;
+    int64_t frame_offset; /* offset of the current frame */
+    int64_t cur_offset; /* current offset
+                           (incremented by each av_parser_parse()) */
+    int64_t next_frame_offset; /* offset of the next frame */
+    /* video info */
+    int pict_type; /* XXX: Put it back in AVCodecContext. */
+    /**
+     * This field is used for proper frame duration computation in lavf.
+     * It signals, how much longer the frame duration of the current frame
+     * is compared to normal frame duration.
+     *
+     * frame_duration = (1 + repeat_pict) * time_base
+     *
+     * It is used by codecs like H.264 to display telecined material.
+     */
+    int repeat_pict; /* XXX: Put it back in AVCodecContext. */
+    int64_t pts;     /* pts of the current frame */
+    int64_t dts;     /* dts of the current frame */
+    /* private data */
+    int64_t last_pts;
+    int64_t last_dts;
+    int fetch_timestamp;
+#define AV_PARSER_PTS_NB 4
+    int cur_frame_start_index;
+    int64_t cur_frame_offset[AV_PARSER_PTS_NB];
+    int64_t cur_frame_pts[AV_PARSER_PTS_NB];
+    int64_t cur_frame_dts[AV_PARSER_PTS_NB];
+    int flags;
+#define PARSER_FLAG_COMPLETE_FRAMES           0x0001
+    int64_t offset;      ///< byte offset from starting packet start
+    int64_t cur_frame_end[AV_PARSER_PTS_NB];
+    /*!
+     * Set by parser to 1 for key frames and 0 for non-key frames.
+     * It is initialized to -1, so if the parser doesn't set this flag,
+     * old-style fallback using FF_I_TYPE picture type as key frames
+     * will be used.
+     */
+    int key_frame;
+    /**
+     * Time difference in stream time base units from the pts of this
+     * packet to the point at which the output from the decoder has converged
+     * independent from the availability of previous frames. That is, the
+     * frames are virtually identical no matter if decoding started from
+     * the very first frame or from this keyframe.
+     * Is AV_NOPTS_VALUE if unknown.
+     * This field is not the display duration of the current frame.
+     *
+     * The purpose of this field is to allow seeking in streams that have no
+     * keyframes in the conventional sense. It corresponds to the
+     * recovery point SEI in H.264 and match_time_delta in NUT. It is also
+     * essential for some types of subtitle streams to ensure that all
+     * subtitles are correctly displayed after seeking.
+     */
+    int64_t convergence_duration;
+    // Timestamp generation support:
+    /**
+     * Synchronization point for start of timestamp generation.
+     *
+     * Set to >0 for sync point, 0 for no sync point and <0 for undefined
+     * (default).
+     *
+     * For example, this corresponds to presence of H.264 buffering period
+     * SEI message.
+     */
+    int dts_sync_point;
+    /**
+     * Offset of the current timestamp against last timestamp sync point in
+     * units of AVCodecContext.time_base.
+     *
+     * Set to INT_MIN when dts_sync_point unused. Otherwise, it must
+     * contain a valid timestamp offset.
+     *
+     * Note that the timestamp of sync point has usually a nonzero
+     * dts_ref_dts_delta, which refers to the previous sync point. Offset of
+     * the next frame after timestamp sync point will be usually 1.
+     *
+     * For example, this corresponds to H.264 cpb_removal_delay.
+     */
+    int dts_ref_dts_delta;
+    /**
+     * Presentation delay of current frame in units of AVCodecContext.time_base.
+     *
+     * Set to INT_MIN when dts_sync_point unused. Otherwise, it must
+     * contain valid non-negative timestamp delta (presentation time of a frame
+     * must not lie in the past).
+     *
+     * This delay represents the difference between decoding and presentation
+     * time of the frame.
+     *
+     * For example, this corresponds to H.264 dpb_output_delay.
+     */
+    int pts_dts_delta;
+    /**
+     * Position of the packet in file.
+     *
+     * Analogous to cur_frame_pts/dts
+     */
+    int64_t cur_frame_pos[AV_PARSER_PTS_NB];
+    /**
+     * Byte position of currently parsed frame in stream.
+     */
+    int64_t pos;
+    /**
+     * Previous frame byte position.
+     */
+    int64_t last_pos;
+} AVCodecParserContext;
+typedef struct AVCodecParser {
+    int codec_ids[5]; /* several codec IDs are permitted */
+    int priv_data_size;
+    int (*parser_init)(AVCodecParserContext *s);
+    int (*parser_parse)(AVCodecParserContext *s,
+                        AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size);
+    void (*parser_close)(AVCodecParserContext *s);
+    int (*split)(AVCodecContext *avctx, const uint8_t *buf, int buf_size);
+    struct AVCodecParser *next;
+} AVCodecParser;
+AVCodecParser *av_parser_next(AVCodecParser *c);
+void av_register_codec_parser(AVCodecParser *parser);
+AVCodecParserContext *av_parser_init(int codec_id);
+attribute_deprecated
+int av_parser_parse(AVCodecParserContext *s,
+                    AVCodecContext *avctx,
+                    uint8_t **poutbuf, int *poutbuf_size,
+                    const uint8_t *buf, int buf_size,
+                    int64_t pts, int64_t dts);
+/**
+ * Parse a packet.
+ *
+ * @param s             parser context.
+ * @param avctx         codec context.
+ * @param poutbuf       set to pointer to parsed buffer or NULL if not yet finished.
+ * @param poutbuf_size  set to size of parsed buffer or zero if not yet finished.
+ * @param buf           input buffer.
+ * @param buf_size      input length, to signal EOF, this should be 0 (so that the last frame can be output).
+ * @param pts           input presentation timestamp.
+ * @param dts           input decoding timestamp.
+ * @param pos           input byte position in stream.
+ * @return the number of bytes of the input bitstream used.
+ *
+ * Example:
+ * @code
+ *   while(in_len){
+ *       len = av_parser_parse2(myparser, AVCodecContext, &data, &size,
+ *                                        in_data, in_len,
+ *                                        pts, dts, pos);
+ *       in_data += len;
+ *       in_len  -= len;
+ *
+ *       if(size)
+ *          decode_frame(data, size);
+ *   }
+ * @endcode
+ */
+int av_parser_parse2(AVCodecParserContext *s,
+                     AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size,
+                     int64_t pts, int64_t dts,
+                     int64_t pos);
+int av_parser_change(AVCodecParserContext *s,
+                     AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe);
+void av_parser_close(AVCodecParserContext *s);
+typedef struct AVBitStreamFilterContext {
+    void *priv_data;
+    struct AVBitStreamFilter *filter;
+    AVCodecParserContext *parser;
+    struct AVBitStreamFilterContext *next;
+} AVBitStreamFilterContext;
+typedef struct AVBitStreamFilter {
+    const char *name;
+    int priv_data_size;
+    int (*filter)(AVBitStreamFilterContext *bsfc,
+                  AVCodecContext *avctx, const char *args,
+                  uint8_t **poutbuf, int *poutbuf_size,
+                  const uint8_t *buf, int buf_size, int keyframe);
+    void (*close)(AVBitStreamFilterContext *bsfc);
+    struct AVBitStreamFilter *next;
+} AVBitStreamFilter;
+void av_register_bitstream_filter(AVBitStreamFilter *bsf);
+AVBitStreamFilterContext *av_bitstream_filter_init(const char *name);
+int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc,
+                               AVCodecContext *avctx, const char *args,
+                               uint8_t **poutbuf, int *poutbuf_size,
+                               const uint8_t *buf, int buf_size, int keyframe);
+void av_bitstream_filter_close(AVBitStreamFilterContext *bsf);
+AVBitStreamFilter *av_bitstream_filter_next(AVBitStreamFilter *f);
+/* memory */
+/**
+ * Reallocates the given block if it is not large enough, otherwise it
+ * does nothing.
+ *
+ * @see av_realloc
+ */
+void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size);
+/**
+ * Copy image 'src' to 'dst'.
+ */
+void av_picture_copy(AVPicture *dst, const AVPicture *src,
+              int pix_fmt, int width, int height);
+/**
+ * Crop image top and left side.
+ */
+int av_picture_crop(AVPicture *dst, const AVPicture *src,
+             int pix_fmt, int top_band, int left_band);
+/**
+ * Pad image.
+ */
+int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt,
+            int padtop, int padbottom, int padleft, int padright, int *color);
+unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+/**
+ * Parses \p str and put in \p width_ptr and \p height_ptr the detected values.
+ *
+ * @return 0 in case of a successful parsing, a negative value otherwise
+ * @param[in] str the string to parse: it has to be a string in the format
+ * <width>x<height> or a valid video frame size abbreviation.
+ * @param[in,out] width_ptr pointer to the variable which will contain the detected
+ * frame width value
+ * @param[in,out] height_ptr pointer to the variable which will contain the detected
+ * frame height value
+ */
+int av_parse_video_frame_size(int *width_ptr, int *height_ptr, const char *str);
+/**
+ * Parses \p str and put in \p frame_rate the detected values.
+ *
+ * @return 0 in case of a successful parsing, a negative value otherwise
+ * @param[in] str the string to parse: it has to be a string in the format
+ * <frame_rate_num>/<frame_rate_den>, a float number or a valid video rate abbreviation
+ * @param[in,out] frame_rate pointer to the AVRational which will contain the detected
+ * frame rate
+ */
+int av_parse_video_frame_rate(AVRational *frame_rate, const char *str);
+/* error handling */
+#if EINVAL > 0
+#define AVERROR(e) (-(e)) /**< Returns a negative error code from a POSIX error code, to return from library functions. */
+#define AVUNERROR(e) (-(e)) /**< Returns a POSIX error code from a library function error return value. */
+#else
+/* Some platforms have E* and errno already negated. */
+#define AVERROR(e) (e)
+#define AVUNERROR(e) (e)
+#endif
+#define AVERROR_UNKNOWN     AVERROR(EINVAL)  /**< unknown error */
+#define AVERROR_IO          AVERROR(EIO)     /**< I/O error */
+#define AVERROR_NUMEXPECTED AVERROR(EDOM)    /**< Number syntax expected in filename. */
+#define AVERROR_INVALIDDATA AVERROR(EINVAL)  /**< invalid data found */
+#define AVERROR_NOMEM       AVERROR(ENOMEM)  /**< not enough memory */
+#define AVERROR_NOFMT       AVERROR(EILSEQ)  /**< unknown format */
+#define AVERROR_NOTSUPP     AVERROR(ENOSYS)  /**< Operation not supported. */
+#define AVERROR_NOENT       AVERROR(ENOENT)  /**< No such file or directory. */
+#define AVERROR_EOF         AVERROR(EPIPE)   /**< End of file. */
+#define AVERROR_PATCHWELCOME    -MKTAG('P','A','W','E') /**< Not yet implemented in FFmpeg. Patches welcome. */
+/**
+ * Registers the hardware accelerator \p hwaccel.
+ */
+void av_register_hwaccel(AVHWAccel *hwaccel);
+/**
+ * If hwaccel is NULL, returns the first registered hardware accelerator,
+ * if hwaccel is non-NULL, returns the next registered hardware accelerator
+ * after hwaccel, or NULL if hwaccel is the last one.
+ */
+AVHWAccel *av_hwaccel_next(AVHWAccel *hwaccel);
+#endif /* MT : DELETE THIS LINE ONLY. */
+#endif /* AVCODEC_AVCODEC_H */
diff --git a/apps/codecs/libatrac/bitstream.c b/apps/codecs/libatrac/bitstream.c
new file mode 100644
index 0000000000..1375134b21
--- /dev/null
+++ b/apps/codecs/libatrac/bitstream.c
@@ -0,0 +1,276 @@
+/*
+ * Common bit i/o utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "bitstream.h"
+#ifdef ROCKBOX
+#undef DEBUGF
+#define DEBUGF(...)
+#endif
+const uint8_t ff_log2_run[32]={
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7,
+ 8, 9,10,11,12,13,14,15
+};
+/**
+ * Same as av_mallocz_static(), but does a realloc.
+ *
+ * @param[in] ptr The block of memory to reallocate.
+ * @param[in] size The requested size.
+ * @return Block of memory of requested size.
+ * @deprecated. Code which uses ff_realloc_static is broken/misdesigned
+ * and should correctly use static arrays
+ */
+void ff_put_string(PutBitContext * pbc, const char *s, int put_zero)
+{
+    while(*s){
+        put_bits(pbc, 8, *s);
+        s++;
+    }
+    if(put_zero)
+        put_bits(pbc, 8, 0);
+}
+/* VLC decoding */
+//#define DEBUG_VLC
+#define GET_DATA(v, table, i, wrap, size) \
+{\
+    const uint8_t *ptr = (const uint8_t *)table + i * wrap;\
+    switch(size) {\
+    case 1:\
+        v = *(const uint8_t *)ptr;\
+        break;\
+    case 2:\
+        v = *(const uint16_t *)ptr;\
+        break;\
+    default:\
+        v = *(const uint32_t *)ptr;\
+        break;\
+    }\
+}
+static int alloc_table(VLC *vlc, int size, int use_static)
+{
+    int index;
+    index = vlc->table_size;
+    vlc->table_size += size;
+    if (vlc->table_size > vlc->table_allocated) {
+        if(use_static>1){
+            DEBUGF("init_vlc() used with too little memory : table_size > allocated_memory\n");
+        }
+        if (!vlc->table)
+            return -1;
+    }
+    return index;
+}
+static int build_table(VLC *vlc, int table_nb_bits,
+                       int nb_codes,
+                       const void *bits, int bits_wrap, int bits_size,
+                       const void *codes, int codes_wrap, int codes_size,
+                       const void *symbols, int symbols_wrap, int symbols_size,
+                       uint32_t code_prefix, int n_prefix, int flags)
+{
+    int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2, symbol;
+    uint32_t code;
+    VLC_TYPE (*table)[2];
+    table_size = 1 << table_nb_bits;
+    table_index = alloc_table(vlc, table_size, flags & (INIT_VLC_USE_STATIC|INIT_VLC_USE_NEW_STATIC));
+#ifdef DEBUG_VLC
+    DEBUGF("new table index=%d size=%d code_prefix=%x n=%d\n",
+           table_index, table_size, code_prefix, n_prefix);
+#endif
+    if (table_index < 0)
+        return -1;
+    table = &vlc->table[table_index];
+    for(i=0;i<table_size;i++) {
+        table[i][1] = 0; //bits
+        table[i][0] = -1; //codes
+    }
+    /* first pass: map codes and compute auxillary table sizes */
+    for(i=0;i<nb_codes;i++) {
+        GET_DATA(n, bits, i, bits_wrap, bits_size);
+        GET_DATA(code, codes, i, codes_wrap, codes_size);
+        /* we accept tables with holes */
+        if (n <= 0)
+            continue;
+        if (!symbols)
+            symbol = i;
+        else
+            GET_DATA(symbol, symbols, i, symbols_wrap, symbols_size);
+#if defined(DEBUG_VLC) && 0
+        DEBUGF("i=%d n=%d code=0x%x\n", i, n, code);
+#endif
+        /* if code matches the prefix, it is in the table */
+        n -= n_prefix;
+        if(flags & INIT_VLC_LE)
+            code_prefix2= code & (n_prefix>=32 ? (int)0xffffffff : (1 << n_prefix)-1);
+        else
+            code_prefix2= code >> n;
+        if (n > 0 && code_prefix2 == (int)code_prefix) {
+            if (n <= table_nb_bits) {
+                /* no need to add another table */
+                j = (code << (table_nb_bits - n)) & (table_size - 1);
+                nb = 1 << (table_nb_bits - n);
+                for(k=0;k<nb;k++) {
+                    if(flags & INIT_VLC_LE)
+                        j = (code >> n_prefix) + (k<<n);
+#ifdef DEBUG_VLC
+                    DEBUGF("%4x: code=%d n=%d\n",
+                           j, i, n);
+#endif
+                    if (table[j][1] /*bits*/ != 0) {
+                        DEBUGF("incorrect codes\n");
+                        return -1;
+                    }
+                    table[j][1] = n; //bits
+                    table[j][0] = symbol;
+                    j++;
+                }
+            } else {
+                n -= table_nb_bits;
+                j = (code >> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1);
+#ifdef DEBUG_VLC
+                DEBUGF("%4x: n=%d (subtable)\n",
+                       j, n);
+#endif
+                /* compute table size */
+                n1 = -table[j][1]; //bits
+                if (n > n1)
+                    n1 = n;
+                table[j][1] = -n1; //bits
+            }
+        }
+    }
+    /* second pass : fill auxillary tables recursively */
+    for(i=0;i<table_size;i++) {
+        n = table[i][1]; //bits
+        if (n < 0) {
+            n = -n;
+            if (n > table_nb_bits) {
+                n = table_nb_bits;
+                table[i][1] = -n; //bits
+            }
+            index = build_table(vlc, n, nb_codes,
+                                bits, bits_wrap, bits_size,
+                                codes, codes_wrap, codes_size,
+                                symbols, symbols_wrap, symbols_size,
+                                (flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i),
+                                n_prefix + table_nb_bits, flags);
+            if (index < 0)
+                return -1;
+            /* note: realloc has been done, so reload tables */
+            table = &vlc->table[table_index];
+            table[i][0] = index; //code
+        }
+    }
+    return table_index;
+}
+/* Build VLC decoding tables suitable for use with get_vlc().
+   'nb_bits' set thee decoding table size (2^nb_bits) entries. The
+   bigger it is, the faster is the decoding. But it should not be too
+   big to save memory and L1 cache. '9' is a good compromise.
+   'nb_codes' : number of vlcs codes
+   'bits' : table which gives the size (in bits) of each vlc code.
+   'codes' : table which gives the bit pattern of of each vlc code.
+   'symbols' : table which gives the values to be returned from get_vlc().
+   'xxx_wrap' : give the number of bytes between each entry of the
+   'bits' or 'codes' tables.
+   'xxx_size' : gives the number of bytes of each entry of the 'bits'
+   or 'codes' tables.
+   'wrap' and 'size' allows to use any memory configuration and types
+   (byte/word/long) to store the 'bits', 'codes', and 'symbols' tables.
+   'use_static' should be set to 1 for tables, which should be freed
+   with av_free_static(), 0 if free_vlc() will be used.
+*/
+int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
+             const void *bits, int bits_wrap, int bits_size,
+             const void *codes, int codes_wrap, int codes_size,
+             const void *symbols, int symbols_wrap, int symbols_size,
+             int flags)
+{
+    vlc->bits = nb_bits;
+    if(flags & INIT_VLC_USE_NEW_STATIC){
+        if(vlc->table_size && vlc->table_size == vlc->table_allocated){
+            return 0;
+        }else if(vlc->table_size){
+            return -1; // fatal error, we are called on a partially initialized table
+        }
+    }else if(!(flags & INIT_VLC_USE_STATIC)) {
+        vlc->table = NULL;
+        vlc->table_allocated = 0;
+        vlc->table_size = 0;
+    } else {
+        /* Static tables are initially always NULL, return
+           if vlc->table != NULL to avoid double allocation */
+        if(vlc->table)
+            return 0;
+    }
+#ifdef DEBUG_VLC
+    DEBUGF("build table nb_codes=%d\n", nb_codes);
+#endif
+    if (build_table(vlc, nb_bits, nb_codes,
+                    bits, bits_wrap, bits_size,
+                    codes, codes_wrap, codes_size,
+                    symbols, symbols_wrap, symbols_size,
+                    0, 0, flags) < 0) {
+        //free(&vlc->table);
+        return -1;
+    }
+    /* Changed the following condition to be true if table_size > table_allocated. *
+     * This would be more sensible for static tables since we want warnings for    *
+     * memory shortages only.                                                      */
+#ifdef TEST
+    if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size > vlc->table_allocated)
+        DEBUGF("needed %d had %d\n", vlc->table_size, vlc->table_allocated);
+#endif
+    return 0;
+}
diff --git a/apps/codecs/libatrac/bitstream.h b/apps/codecs/libatrac/bitstream.h
new file mode 100644
index 0000000000..b4c3136be3
--- /dev/null
+++ b/apps/codecs/libatrac/bitstream.h
@@ -0,0 +1,963 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef BITSTREAM_H
+#define BITSTREAM_H
+#include <inttypes.h>
+#include <stdlib.h>
+//#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include "bswap.h"
+/* The following 2 defines are taken from libavutil/intreadwrite.h */
+#define AV_RB32(x)  ((((const uint8_t*)(x))[0] << 24) | \
+                     (((const uint8_t*)(x))[1] << 16) | \
+                     (((const uint8_t*)(x))[2] <<  8) | \
+                      ((const uint8_t*)(x))[3])
+#define AV_WB32(p, d) do { \
+                    ((uint8_t*)(p))[3] = (d); \
+                    ((uint8_t*)(p))[2] = (d)>>8; \
+                    ((uint8_t*)(p))[1] = (d)>>16; \
+                    ((uint8_t*)(p))[0] = (d)>>24; } while(0)
+#if defined(ALT_BITSTREAM_READER_LE) && !defined(ALT_BITSTREAM_READER)
+#   define ALT_BITSTREAM_READER
+#endif
+//#define ALT_BITSTREAM_WRITER
+//#define ALIGNED_BITSTREAM_WRITER
+#if !defined(LIBMPEG2_BITSTREAM_READER) && !defined(A32_BITSTREAM_READER) && !defined(ALT_BITSTREAM_READER)
+#   if defined(ARCH_ARM)
+#       define A32_BITSTREAM_READER
+#   else
+#       define ALT_BITSTREAM_READER
+//#define LIBMPEG2_BITSTREAM_READER
+//#define A32_BITSTREAM_READER
+#   endif
+#endif
+extern const uint8_t ff_reverse[256];
+#if defined(ARCH_X86)
+// avoid +32 for shift optimization (gcc should do that ...)
+static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
+    __asm__ ("sarl %1, %0\n\t"
+         : "+r" (a)
+         : "ic" ((uint8_t)(-s))
+    );
+    return a;
+}
+static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
+    __asm__ ("shrl %1, %0\n\t"
+         : "+r" (a)
+         : "ic" ((uint8_t)(-s))
+    );
+    return a;
+}
+#else
+#    define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
+#    define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
+#endif
+/* bit output */
+/* buf and buf_end must be present and used by every alternative writer. */
+typedef struct PutBitContext {
+#ifdef ALT_BITSTREAM_WRITER
+    uint8_t *buf, *buf_end;
+    int index;
+#else
+    uint32_t bit_buf;
+    int bit_left;
+    uint8_t *buf, *buf_ptr, *buf_end;
+#endif
+    int size_in_bits;
+} PutBitContext;
+static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
+{
+    if(buffer_size < 0) {
+        buffer_size = 0;
+        buffer = NULL;
+    }
+    s->size_in_bits= 8*buffer_size;
+    s->buf = buffer;
+    s->buf_end = s->buf + buffer_size;
+#ifdef ALT_BITSTREAM_WRITER
+    s->index=0;
+    ((uint32_t*)(s->buf))[0]=0;
+//    memset(buffer, 0, buffer_size);
+#else
+    s->buf_ptr = s->buf;
+    s->bit_left=32;
+    s->bit_buf=0;
+#endif
+}
+/* return the number of bits output */
+static inline int put_bits_count(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    return s->index;
+#else
+    return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left;
+#endif
+}
+/* pad the end of the output stream with zeros */
+static inline void flush_put_bits(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    align_put_bits(s);
+#else
+#ifndef BITSTREAM_WRITER_LE
+    s->bit_buf<<= s->bit_left;
+#endif
+    while (s->bit_left < 32) {
+        /* XXX: should test end of buffer */
+#ifdef BITSTREAM_WRITER_LE
+        *s->buf_ptr++=s->bit_buf;
+        s->bit_buf>>=8;
+#else
+        *s->buf_ptr++=s->bit_buf >> 24;
+        s->bit_buf<<=8;
+#endif
+        s->bit_left+=8;
+    }
+    s->bit_left=32;
+    s->bit_buf=0;
+#endif
+}
+void align_put_bits(PutBitContext *s);
+void ff_put_string(PutBitContext * pbc, const char *s, int put_zero);
+void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length);
+/* bit input */
+/* buffer, buffer_end and size_in_bits must be present and used by every reader */
+typedef struct GetBitContext {
+    const uint8_t *buffer, *buffer_end;
+#ifdef ALT_BITSTREAM_READER
+    int index;
+#elif defined LIBMPEG2_BITSTREAM_READER
+    uint8_t *buffer_ptr;
+    uint32_t cache;
+    int bit_count;
+#elif defined A32_BITSTREAM_READER
+    uint32_t *buffer_ptr;
+    uint32_t cache0;
+    uint32_t cache1;
+    int bit_count;
+#endif
+    int size_in_bits;
+} GetBitContext;
+#define VLC_TYPE int16_t
+typedef struct VLC {
+    int bits;
+    VLC_TYPE (*table)[2]; ///< code, bits
+    int table_size, table_allocated;
+} VLC;
+typedef struct RL_VLC_ELEM {
+    int16_t level;
+    int8_t len;
+    uint8_t run;
+} RL_VLC_ELEM;
+#ifndef ALT_BITSTREAM_WRITER
+static inline void put_bits(PutBitContext *s, int n, unsigned int value)
+{
+    unsigned int bit_buf;
+    int bit_left;
+    //    printf("put_bits=%d %x\n", n, value);
+    //assert(n == 32 || value < (1U << n));
+    bit_buf = s->bit_buf;
+    bit_left = s->bit_left;
+    //    printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
+    /* XXX: optimize */
+#ifdef BITSTREAM_WRITER_LE
+    bit_buf |= value << (32 - bit_left);
+    if (n >= bit_left) {
+#if !HAVE_FAST_UNALIGNED
+        if (3 & (intptr_t) s->buf_ptr) {
+            AV_WL32(s->buf_ptr, bit_buf);
+        } else
+#endif
+        *(uint32_t *)s->buf_ptr = le2me_32(bit_buf);
+        s->buf_ptr+=4;
+        bit_buf = (bit_left==32)?0:value >> bit_left;
+        bit_left+=32;
+    }
+    bit_left-=n;
+#else
+    if (n < bit_left) {
+        bit_buf = (bit_buf<<n) | value;
+        bit_left-=n;
+    } else {
+        bit_buf<<=bit_left;
+        bit_buf |= value >> (n - bit_left);
+#if !defined(HAVE_FAST_UNALIGNED)
+        if (3 & (intptr_t) s->buf_ptr) {
+            AV_WB32(s->buf_ptr, bit_buf);
+        } else
+#endif
+        *(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
+        //printf("bitbuf = %08x\n", bit_buf);
+        s->buf_ptr+=4;
+        bit_left+=32 - n;
+        bit_buf = value;
+    }
+#endif
+    s->bit_buf = bit_buf;
+    s->bit_left = bit_left;
+}
+#endif
+#ifdef ALT_BITSTREAM_WRITER
+static inline void put_bits(PutBitContext *s, int n, unsigned int value)
+{
+#    ifdef ALIGNED_BITSTREAM_WRITER
+#        if ARCH_X86
+    __asm__ volatile(
+        "movl %0, %%ecx                 \n\t"
+        "xorl %%eax, %%eax              \n\t"
+        "shrdl %%cl, %1, %%eax          \n\t"
+        "shrl %%cl, %1                  \n\t"
+        "movl %0, %%ecx                 \n\t"
+        "shrl $3, %%ecx                 \n\t"
+        "andl $0xFFFFFFFC, %%ecx        \n\t"
+        "bswapl %1                      \n\t"
+        "orl %1, (%2, %%ecx)            \n\t"
+        "bswapl %%eax                   \n\t"
+        "addl %3, %0                    \n\t"
+        "movl %%eax, 4(%2, %%ecx)       \n\t"
+        : "=&r" (s->index), "=&r" (value)
+        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
+        : "%eax", "%ecx"
+    );
+#        else
+    int index= s->index;
+    uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5);
+    value<<= 32-n;
+    ptr[0] |= be2me_32(value>>(index&31));
+    ptr[1]  = be2me_32(value<<(32-(index&31)));
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#        endif
+#    else //ALIGNED_BITSTREAM_WRITER
+#        if ARCH_X86
+    __asm__ volatile(
+        "movl $7, %%ecx                 \n\t"
+        "andl %0, %%ecx                 \n\t"
+        "addl %3, %%ecx                 \n\t"
+        "negl %%ecx                     \n\t"
+        "shll %%cl, %1                  \n\t"
+        "bswapl %1                      \n\t"
+        "movl %0, %%ecx                 \n\t"
+        "shrl $3, %%ecx                 \n\t"
+        "orl %1, (%%ecx, %2)            \n\t"
+        "addl %3, %0                    \n\t"
+        "movl $0, 4(%%ecx, %2)          \n\t"
+        : "=&r" (s->index), "=&r" (value)
+        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
+        : "%ecx"
+    );
+#        else
+    int index= s->index;
+    uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3));
+    ptr[0] |= be2me_32(value<<(32-n-(index&7) ));
+    ptr[1] = 0;
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#        endif
+#    endif //!ALIGNED_BITSTREAM_WRITER
+}
+#endif
+static inline void put_sbits(PutBitContext *pb, int bits, int32_t val)
+{
+    //assert(bits >= 0 && bits <= 31);
+    put_bits(pb, bits, val & ((1<<bits)-1));
+}
+static inline uint8_t* pbBufPtr(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+        return s->buf + (s->index>>3);
+#else
+        return s->buf_ptr;
+#endif
+}
+/**
+ *
+ * PutBitContext must be flushed & aligned to a byte boundary before calling this.
+ */
+static inline void skip_put_bytes(PutBitContext *s, int n){
+        //assert((put_bits_count(s)&7)==0);
+#ifdef ALT_BITSTREAM_WRITER
+        FIXME may need some cleaning of the buffer
+        s->index += n<<3;
+#else
+        //assert(s->bit_left==32);
+        s->buf_ptr += n;
+#endif
+}
+/**
+ * Skips the given number of bits.
+ * Must only be used if the actual values in the bitstream do not matter.
+ */
+static inline void skip_put_bits(PutBitContext *s, int n){
+#ifdef ALT_BITSTREAM_WRITER
+    s->index += n;
+#else
+    s->bit_left -= n;
+    s->buf_ptr-= s->bit_left>>5;
+    s->bit_left &= 31;
+#endif
+}
+/**
+ * Changes the end of the buffer.
+ */
+static inline void set_put_bits_buffer_size(PutBitContext *s, int size){
+    s->buf_end= s->buf + size;
+}
+/* Bitstream reader API docs:
+name
+    arbitrary name which is used as prefix for the internal variables
+gb
+    getbitcontext
+OPEN_READER(name, gb)
+    loads gb into local variables
+CLOSE_READER(name, gb)
+    stores local vars in gb
+UPDATE_CACHE(name, gb)
+    refills the internal cache from the bitstream
+    after this call at least MIN_CACHE_BITS will be available,
+GET_CACHE(name, gb)
+    will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit)
+SHOW_UBITS(name, gb, num)
+    will return the next num bits
+SHOW_SBITS(name, gb, num)
+    will return the next num bits and do sign extension
+SKIP_BITS(name, gb, num)
+    will skip over the next num bits
+    note, this is equivalent to SKIP_CACHE; SKIP_COUNTER
+SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER)
+SKIP_COUNTER(name, gb, num)
+    will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS)
+LAST_SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing
+LAST_SKIP_BITS(name, gb, num)
+    is equivalent to SKIP_LAST_CACHE; SKIP_COUNTER
+for examples see get_bits, show_bits, skip_bits, get_vlc
+*/
+#ifdef ALT_BITSTREAM_READER
+#   define MIN_CACHE_BITS 25
+#   define OPEN_READER(name, gb)\
+        int name##_index= (gb)->index;\
+        int name##_cache= 0;\
+#   define CLOSE_READER(name, gb)\
+        (gb)->index= name##_index;\
+# ifdef ALT_BITSTREAM_READER_LE
+#   define UPDATE_CACHE(name, gb)\
+        name##_cache= AV_RL32( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) >> (name##_index&0x07);\
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache >>= (num);
+# else
+#   define UPDATE_CACHE(name, gb)\
+        name##_cache= AV_RB32( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);
+# endif
+// FIXME name?
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_index += (num);\
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) ;
+# ifdef ALT_BITSTREAM_READER_LE
+#   define SHOW_UBITS(name, gb, num)\
+        ((name##_cache) & (NEG_USR32(0xffffffff,num)))
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32((name##_cache)<<(32-(num)), num)
+# else
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache, num)
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache, num)
+# endif
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+static inline int get_bits_count(GetBitContext *s){
+    return s->index;
+}
+static inline void skip_bits_long(GetBitContext *s, int n){
+    s->index += n;
+}
+#elif defined LIBMPEG2_BITSTREAM_READER
+//libmpeg2 like reader
+#   define MIN_CACHE_BITS 17
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        int name##_cache= (gb)->cache;\
+        uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache= name##_cache;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count >= 0){\
+        name##_cache+= AV_RB16(name##_buffer_ptr) << name##_bit_count; \
+        name##_buffer_ptr+=2;\
+        name##_bit_count-= 16;\
+    }\
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);\
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache, num)
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache, num)
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+static inline int get_bits_count(GetBitContext *s){
+    return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count;
+}
+static inline void skip_bits_long(GetBitContext *s, int n){
+    OPEN_READER(re, s)
+    re_bit_count += n;
+    re_buffer_ptr += 2*(re_bit_count>>4);
+    re_bit_count &= 15;
+    re_cache = ((re_buffer_ptr[-2]<<8) + re_buffer_ptr[-1]) << (16+re_bit_count);
+    UPDATE_CACHE(re, s)
+    CLOSE_READER(re, s)
+}
+#elif defined A32_BITSTREAM_READER
+#   define MIN_CACHE_BITS 32
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        uint32_t name##_cache0= (gb)->cache0;\
+        uint32_t name##_cache1= (gb)->cache1;\
+        uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache0= name##_cache0;\
+        (gb)->cache1= name##_cache1;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count > 0){\
+        const uint32_t next= be2me_32( *name##_buffer_ptr );\
+        name##_cache0 |= NEG_USR32(next,name##_bit_count);\
+        name##_cache1 |= next<<name##_bit_count;\
+        name##_buffer_ptr++;\
+        name##_bit_count-= 32;\
+    }\
+#if ARCH_X86
+#   define SKIP_CACHE(name, gb, num)\
+        __asm__(\
+            "shldl %2, %1, %0          \n\t"\
+            "shll %2, %1               \n\t"\
+            : "+r" (name##_cache0), "+r" (name##_cache1)\
+            : "Ic" ((uint8_t)(num))\
+           );
+#else
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache0 <<= (num);\
+        name##_cache0 |= NEG_USR32(name##_cache1,num);\
+        name##_cache1 <<= (num);
+#endif
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache0, num)
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache0, num)
+#   define GET_CACHE(name, gb)\
+        (name##_cache0)
+static inline int get_bits_count(GetBitContext *s){
+    return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
+}
+static inline void skip_bits_long(GetBitContext *s, int n){
+    OPEN_READER(re, s)
+    re_bit_count += n;
+    re_buffer_ptr += re_bit_count>>5;
+    re_bit_count &= 31;
+    re_cache0 = be2me_32( re_buffer_ptr[-1] ) << re_bit_count;
+    re_cache1 = 0;
+    UPDATE_CACHE(re, s)
+    CLOSE_READER(re, s)
+}
+#endif
+/**
+ * read mpeg1 dc style vlc (sign bit + mantisse with no MSB).
+ * if MSB not set it is negative
+ * @param n length in bits
+ * @author BERO
+ */
+static inline int get_xbits(GetBitContext *s, int n){
+    register int sign;
+    register int32_t cache;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    cache = GET_CACHE(re,s);
+    sign=(~cache)>>31;
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return (NEG_USR32(sign ^ cache, n) ^ sign) - sign;
+}
+static inline int get_sbits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_SBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+/**
+ * reads 1-17 bits.
+ * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
+ */
+static inline unsigned int get_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+/**
+ * shows 1-17 bits.
+ * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
+ */
+static inline unsigned int show_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+//    CLOSE_READER(re, s)
+    return tmp;
+}
+static inline void skip_bits(GetBitContext *s, int n){
+ //Note gcc seems to optimize this to s->index+=n for the ALT_READER :))
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+}
+static inline unsigned int get_bits1(GetBitContext *s){
+#ifdef ALT_BITSTREAM_READER
+    int index= s->index;
+    uint8_t result= s->buffer[ index>>3 ];
+#ifdef ALT_BITSTREAM_READER_LE
+    result>>= (index&0x07);
+    result&= 1;
+#else
+    result<<= (index&0x07);
+    result>>= 8 - 1;
+#endif
+    index++;
+    s->index= index;
+    return result;
+#else
+    return get_bits(s, 1);
+#endif
+}
+static inline unsigned int show_bits1(GetBitContext *s){
+    return show_bits(s, 1);
+}
+static inline void skip_bits1(GetBitContext *s){
+    skip_bits(s, 1);
+}
+/**
+ * reads 0-32 bits.
+ */
+static inline unsigned int get_bits_long(GetBitContext *s, int n){
+    if(n<=17) return get_bits(s, n);
+    else{
+#ifdef ALT_BITSTREAM_READER_LE
+        int ret= get_bits(s, 16);
+        return ret | (get_bits(s, n-16) << 16);
+#else
+        int ret= get_bits(s, 16) << (n-16);
+        return ret | get_bits(s, n-16);
+#endif
+    }
+}
+#if 0
+/**
+ * reads 0-32 bits as a signed integer.
+ */
+static inline int get_sbits_long(GetBitContext *s, int n) {
+    return sign_extend(get_bits_long(s, n), n);
+}
+#endif
+/**
+ * shows 0-32 bits.
+ */
+static inline unsigned int show_bits_long(GetBitContext *s, int n){
+    if(n<=17) return show_bits(s, n);
+    else{
+        GetBitContext gb= *s;
+        return get_bits_long(&gb, n);
+    }
+}
+#if 0
+static inline int check_marker(GetBitContext *s, const char *msg)
+{
+    int bit= get_bits1(s);
+    if(!bit)
+        printf("Marker bit missing %s\n", msg);
+    return bit;
+}
+#endif
+/**
+ * init GetBitContext.
+ * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits
+ * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
+ * @param bit_size the size of the buffer in bits
+ */
+static inline void init_get_bits(GetBitContext *s,
+                   const uint8_t *buffer, int bit_size)
+{
+    int buffer_size= (bit_size+7)>>3;
+    if(buffer_size < 0 || bit_size < 0) {
+        buffer_size = bit_size = 0;
+        buffer = NULL;
+    }
+    s->buffer= buffer;
+    s->size_in_bits= bit_size;
+    s->buffer_end= buffer + buffer_size;
+#ifdef ALT_BITSTREAM_READER
+    s->index=0;
+#elif defined LIBMPEG2_BITSTREAM_READER
+    s->buffer_ptr = (uint8_t*)((intptr_t)buffer&(~1));
+    s->bit_count = 16 + 8*((intptr_t)buffer&1);
+    skip_bits_long(s, 0);
+#elif defined A32_BITSTREAM_READER
+    s->buffer_ptr = (uint32_t*)((intptr_t)buffer&(~3));
+    s->bit_count = 32 + 8*((intptr_t)buffer&3);
+    skip_bits_long(s, 0);
+#endif
+}
+static inline void align_get_bits(GetBitContext *s)
+{
+    int n= (-get_bits_count(s)) & 7;
+    if(n) skip_bits(s, n);
+}
+#define init_vlc(vlc, nb_bits, nb_codes,\
+                 bits, bits_wrap, bits_size,\
+                 codes, codes_wrap, codes_size,\
+                 flags)\
+        init_vlc_sparse(vlc, nb_bits, nb_codes,\
+                 bits, bits_wrap, bits_size,\
+                 codes, codes_wrap, codes_size,\
+                 NULL, 0, 0, flags)
+int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
+             const void *bits, int bits_wrap, int bits_size,
+             const void *codes, int codes_wrap, int codes_size,
+             const void *symbols, int symbols_wrap, int symbols_size,
+             int flags);
+#define INIT_VLC_USE_STATIC 1 ///< VERY strongly deprecated and forbidden
+#define INIT_VLC_LE         2
+#define INIT_VLC_USE_NEW_STATIC 4
+void free_vlc(VLC *vlc);
+#define INIT_VLC_STATIC(vlc, bits, a,b,c,d,e,f,g, static_size)\
+{\
+    static VLC_TYPE table[static_size][2];\
+    (vlc)->table= table;\
+    (vlc)->table_allocated= static_size;\
+    init_vlc(vlc, bits, a,b,c,d,e,f,g, INIT_VLC_USE_NEW_STATIC);\
+}
+/**
+ *
+ * if the vlc code is invalid and max_depth=1 than no bits will be removed
+ * if the vlc code is invalid and max_depth>1 than the number of bits removed
+ * is undefined
+ */
+#define GET_VLC(code, name, gb, table, bits, max_depth)\
+{\
+    int n, index, nb_bits;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    code = table[index][0];\
+    n    = table[index][1];\
+\
+    if(max_depth > 1 && n < 0){\
+        LAST_SKIP_BITS(name, gb, bits)\
+        UPDATE_CACHE(name, gb)\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + code;\
+        code = table[index][0];\
+        n    = table[index][1];\
+        if(max_depth > 2 && n < 0){\
+            LAST_SKIP_BITS(name, gb, nb_bits)\
+            UPDATE_CACHE(name, gb)\
+\
+            nb_bits = -n;\
+\
+            index= SHOW_UBITS(name, gb, nb_bits) + code;\
+            code = table[index][0];\
+            n    = table[index][1];\
+        }\
+    }\
+    SKIP_BITS(name, gb, n)\
+}
+#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\
+{\
+    int n, index, nb_bits;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    level = table[index].level;\
+    n     = table[index].len;\
+\
+    if(max_depth > 1 && n < 0){\
+        SKIP_BITS(name, gb, bits)\
+        if(need_update){\
+            UPDATE_CACHE(name, gb)\
+        }\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + level;\
+        level = table[index].level;\
+        n     = table[index].len;\
+    }\
+    run= table[index].run;\
+    SKIP_BITS(name, gb, n)\
+}
+/**
+ * parses a vlc code, faster then get_vlc()
+ * @param bits is the number of bits which will be read at once, must be
+ *             identical to nb_bits in init_vlc()
+ * @param max_depth is the number of times bits bits must be read to completely
+ *                  read the longest vlc code
+ *                  = (max_vlc_length + bits - 1) / bits
+ */
+static inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
+                                  int bits, int max_depth)
+{
+    int code;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    GET_VLC(code, re, s, table, bits, max_depth)
+    CLOSE_READER(re, s)
+    return code;
+}
+//#define TRACE
+#ifdef TRACE
+static inline void print_bin(int bits, int n){
+    int i;
+    for(i=n-1; i>=0; i--){
+        printf("%d", (bits>>i)&1);
+    }
+    for(i=n; i<24; i++)
+        printf(" ");
+}
+static inline int get_bits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
+    int r= get_bits(s, n);
+    print_bin(r, n);
+    printf("%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line);
+    return r;
+}
+static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth, char *file, const char *func, int line){
+    int show= show_bits(s, 24);
+    int pos= get_bits_count(s);
+    int r= get_vlc2(s, table, bits, max_depth);
+    int len= get_bits_count(s) - pos;
+    int bits2= show>>(24-len);
+    print_bin(bits2, len);
+    printf("%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line);
+    return r;
+}
+static inline int get_xbits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
+    int show= show_bits(s, n);
+    int r= get_xbits(s, n);
+    print_bin(show, n);
+    printf("%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line);
+    return r;
+}
+#define get_bits(s, n)  get_bits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_bits1(s)    get_bits_trace(s, 1, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_xbits(s, n) get_xbits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_vlc(s, vlc)            get_vlc_trace(s, (vlc)->table, (vlc)->bits, 3, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_vlc2(s, tab, bits, max) get_vlc_trace(s, tab, bits, max, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define tprintf(p, ...) printf
+#else //TRACE
+#define tprintf(p, ...) {}
+#endif
+static inline int decode012(GetBitContext *gb){
+    int n;
+    n = get_bits1(gb);
+    if (n == 0)
+        return 0;
+    else
+        return get_bits1(gb) + 1;
+}
+static inline int decode210(GetBitContext *gb){
+    if (get_bits1(gb))
+        return 0;
+    else
+        return 2 - get_bits1(gb);
+}
+#endif /* BITSTREAM_H */
diff --git a/apps/codecs/libatrac/bswap.h b/apps/codecs/libatrac/bswap.h
new file mode 100644
index 0000000000..ff807815a3
--- /dev/null
+++ b/apps/codecs/libatrac/bswap.h
@@ -0,0 +1,150 @@
+/**
+ * @file bswap.h
+ * byte swap.
+ */
+#ifndef __BSWAP_H__
+#define __BSWAP_H__
+#ifdef HAVE_BYTESWAP_H
+#include <byteswap.h>
+#else
+#ifdef ROCKBOX
+#include "codecs.h"
+/* rockbox' optimised inline functions */
+#define bswap_16(x) swap16(x)
+#define bswap_32(x) swap32(x)
+static inline uint64_t ByteSwap64(uint64_t x)
+{
+    union { 
+        uint64_t ll;
+        struct {
+           uint32_t l,h;
+        } l;
+    } r;
+    r.l.l = bswap_32 (x);
+    r.l.h = bswap_32 (x>>32);
+    return r.ll;
+}
+#define bswap_64(x) ByteSwap64(x)
+#elif defined(ARCH_X86)
+static inline unsigned short ByteSwap16(unsigned short x)
+{
+  __asm("xchgb %b0,%h0" :
+        "=q" (x)        :
+        "0" (x));
+    return x;
+}
+#define bswap_16(x) ByteSwap16(x)
+static inline unsigned int ByteSwap32(unsigned int x)
+{
+#if __CPU__ > 386
+ __asm("bswap   %0":
+      "=r" (x)     :
+#else
+ __asm("xchgb   %b0,%h0\n"
+      " rorl    $16,%0\n"
+      " xchgb   %b0,%h0":
+      "=q" (x)          :
+#endif
+      "0" (x));
+  return x;
+}
+#define bswap_32(x) ByteSwap32(x)
+static inline unsigned long long int ByteSwap64(unsigned long long int x)
+{
+  register union { __extension__ uint64_t __ll;
+          uint32_t __l[2]; } __x;
+  asm("xchgl    %0,%1":
+      "=r"(__x.__l[0]),"=r"(__x.__l[1]):
+      "0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32))));
+  return __x.__ll;
+}
+#define bswap_64(x) ByteSwap64(x)
+#elif defined(ARCH_SH4)
+static inline uint16_t ByteSwap16(uint16_t x) {
+        __asm__("swap.b %0,%0":"=r"(x):"0"(x));
+        return x;
+}
+static inline uint32_t ByteSwap32(uint32_t x) {
+        __asm__(
+        "swap.b %0,%0\n"
+        "swap.w %0,%0\n"
+        "swap.b %0,%0\n"
+        :"=r"(x):"0"(x));
+        return x;
+}
+#define bswap_16(x) ByteSwap16(x)
+#define bswap_32(x) ByteSwap32(x)
+static inline uint64_t ByteSwap64(uint64_t x)
+{
+    union { 
+        uint64_t ll;
+        struct {
+           uint32_t l,h;
+        } l;
+    } r;
+    r.l.l = bswap_32 (x);
+    r.l.h = bswap_32 (x>>32);
+    return r.ll;
+}
+#define bswap_64(x) ByteSwap64(x)
+#else
+#define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
+// code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc.
+#define bswap_32(x) \
+     ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >>  8) | \
+      (((x) & 0x0000ff00) <<  8) | (((x) & 0x000000ff) << 24))
+/*static inline uint64_t ByteSwap64(uint64_t x)
+{
+    union { 
+        uint64_t ll;
+        uint32_t l[2]; 
+    } w, r;
+    w.ll = x;
+    r.l[0] = bswap_32 (w.l[1]);
+    r.l[1] = bswap_32 (w.l[0]);
+    return r.ll;
+}*/
+#define bswap_64(x) ByteSwap64(x)
+#endif  /* !ARCH_X86 */
+#endif  /* !HAVE_BYTESWAP_H */
+// be2me ... BigEndian to MachineEndian
+// le2me ... LittleEndian to MachineEndian
+#ifdef WORDS_BIGENDIAN
+#define be2me_16(x) (x)
+#define be2me_32(x) (x)
+#define be2me_64(x) (x)
+#define le2me_16(x) bswap_16(x)
+#define le2me_32(x) bswap_32(x)
+#define le2me_64(x) bswap_64(x)
+#else
+#define be2me_16(x) bswap_16(x)
+#define be2me_32(x) bswap_32(x)
+#define be2me_64(x) bswap_64(x)
+#define le2me_16(x) (x)
+#define le2me_32(x) (x)
+#define le2me_64(x) (x)
+#endif
+#endif /* __BSWAP_H__ */
diff --git a/apps/codecs/libatrac/bytestream.h b/apps/codecs/libatrac/bytestream.h
new file mode 100644
index 0000000000..b56f6ce743
--- /dev/null
+++ b/apps/codecs/libatrac/bytestream.h
@@ -0,0 +1,71 @@
+/*
+ * Bytestream functions
+ * copyright (c) 2006 Baptiste Coudurier <baptiste.coudurier@free.fr>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_BYTESTREAM_H
+#define AVCODEC_BYTESTREAM_H
+#include <string.h>
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#define DEF_T(type, name, bytes, read, write)                             \
+static av_always_inline type bytestream_get_ ## name(const uint8_t **b){\
+    (*b) += bytes;\
+    return read(*b - bytes);\
+}\
+static av_always_inline void bytestream_put_ ##name(uint8_t **b, const type value){\
+    write(*b, value);\
+    (*b) += bytes;\
+}
+#define DEF(name, bytes, read, write) \
+    DEF_T(unsigned int, name, bytes, read, write)
+#define DEF64(name, bytes, read, write) \
+    DEF_T(uint64_t, name, bytes, read, write)
+DEF64(le64, 8, AV_RL64, AV_WL64)
+DEF  (le32, 4, AV_RL32, AV_WL32)
+DEF  (le24, 3, AV_RL24, AV_WL24)
+DEF  (le16, 2, AV_RL16, AV_WL16)
+DEF64(be64, 8, AV_RB64, AV_WB64)
+DEF  (be32, 4, AV_RB32, AV_WB32)
+DEF  (be24, 3, AV_RB24, AV_WB24)
+DEF  (be16, 2, AV_RB16, AV_WB16)
+DEF  (byte, 1, AV_RB8 , AV_WB8 )
+#undef DEF
+#undef DEF64
+#undef DEF_T
+static av_always_inline unsigned int bytestream_get_buffer(const uint8_t **b, uint8_t *dst, unsigned int size)
+{
+    memcpy(dst, *b, size);
+    (*b) += size;
+    return size;
+}
+static av_always_inline void bytestream_put_buffer(uint8_t **b, const uint8_t *src, unsigned int size)
+{
+    memcpy(*b, src, size);
+    (*b) += size;
+}
+#endif /* AVCODEC_BYTESTREAM_H */
diff --git a/apps/codecs/libatrac/dsputil.c b/apps/codecs/libatrac/dsputil.c
new file mode 100644
index 0000000000..412a934862
--- /dev/null
+++ b/apps/codecs/libatrac/dsputil.c
@@ -0,0 +1,4114 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/dsputil.c
+ * DSP utils
+ */
+#include "avcodec.h"
+#include "dsputil.h"
+/*#include "simple_idct.h"
+#include "faandct.h"
+#include "faanidct.h"
+#include "mathops.h"
+#include "h263.h"
+#include "snow.h" */
+/* snow.c */
+void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
+/* vorbis.c */
+void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
+/* ac3dec.c */
+void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
+/* flacenc.c */
+void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
+/* pngdec.c */
+void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
+/* eaidct.c */
+void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
+uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
+uint32_t ff_squareTbl[512] = {0, };
+// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
+#define pb_7f (~0UL/255 * 0x7f)
+#define pb_80 (~0UL/255 * 0x80)
+const uint8_t ff_zigzag_direct[64] = {
+    0,   1,  8, 16,  9,  2,  3, 10,
+    17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+/* Specific zigzag scan for 248 idct. NOTE that unlike the
+   specification, we interleave the fields */
+const uint8_t ff_zigzag248_direct[64] = {
+     0,  8,  1,  9, 16, 24,  2, 10,
+    17, 25, 32, 40, 48, 56, 33, 41,
+    18, 26,  3, 11,  4, 12, 19, 27,
+    34, 42, 49, 57, 50, 58, 35, 43,
+    20, 28,  5, 13,  6, 14, 21, 29,
+    36, 44, 51, 59, 52, 60, 37, 45,
+    22, 30,  7, 15, 23, 31, 38, 46,
+    53, 61, 54, 62, 39, 47, 55, 63,
+};
+/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
+DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, };
+const uint8_t ff_alternate_horizontal_scan[64] = {
+    0,  1,   2,  3,  8,  9, 16, 17,
+    10, 11,  4,  5,  6,  7, 15, 14,
+    13, 12, 19, 18, 24, 25, 32, 33,
+    26, 27, 20, 21, 22, 23, 28, 29,
+    30, 31, 34, 35, 40, 41, 48, 49,
+    42, 43, 36, 37, 38, 39, 44, 45,
+    46, 47, 50, 51, 56, 57, 58, 59,
+    52, 53, 54, 55, 60, 61, 62, 63,
+};
+const uint8_t ff_alternate_vertical_scan[64] = {
+    0,  8,  16, 24,  1,  9,  2, 10,
+    17, 25, 32, 40, 48, 56, 57, 49,
+    41, 33, 26, 18,  3, 11,  4, 12,
+    19, 27, 34, 42, 50, 58, 35, 43,
+    51, 59, 20, 28,  5, 13,  6, 14,
+    21, 29, 36, 44, 52, 60, 37, 45,
+    53, 61, 22, 30,  7, 15, 23, 31,
+    38, 46, 54, 62, 39, 47, 55, 63,
+};
+/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
+const uint32_t ff_inverse[256]={
+         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757,
+ 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154,
+ 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709,
+ 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333,
+ 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367,
+ 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283,
+  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315,
+  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085,
+  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498,
+  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675,
+  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441,
+  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183,
+  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712,
+  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400,
+  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163,
+  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641,
+  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573,
+  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737,
+  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493,
+  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373,
+  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368,
+  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671,
+  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767,
+  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740,
+  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751,
+  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635,
+  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593,
+  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944,
+  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933,
+  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575,
+  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532,
+  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
+};
+/* Input permutation for the simple_idct_mmx */
+static const uint8_t simple_mmx_permutation[64]={
+        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
+        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
+        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
+        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
+        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
+        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
+        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
+        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
+};
+static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
+void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
+    int i;
+    int end;
+    st->scantable= src_scantable;
+    for(i=0; i<64; i++){
+        int j;
+        j = src_scantable[i];
+        st->permutated[i] = permutation[j];
+#if ARCH_PPC
+        st->inverse[j] = i;
+#endif
+    }
+    end=-1;
+    for(i=0; i<64; i++){
+        int j;
+        j = st->permutated[i];
+        if(j>end) end=j;
+        st->raster_end[i]= end;
+    }
+}
+#if CONFIG_SNOW_ENCODER //dwt is in snow.c
+static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
+    int s, i, j;
+    const int dec_count= w==8 ? 3 : 4;
+    int tmp[32*32];
+    int level, ori;
+    static const int scale[2][2][4][4]={
+      {
+        {
+            // 9/7 8x8 dec=3
+            {268, 239, 239, 213},
+            {  0, 224, 224, 152},
+            {  0, 135, 135, 110},
+        },{
+            // 9/7 16x16 or 32x32 dec=4
+            {344, 310, 310, 280},
+            {  0, 320, 320, 228},
+            {  0, 175, 175, 136},
+            {  0, 129, 129, 102},
+        }
+      },{
+        {
+            // 5/3 8x8 dec=3
+            {275, 245, 245, 218},
+            {  0, 230, 230, 156},
+            {  0, 138, 138, 113},
+        },{
+            // 5/3 16x16 or 32x32 dec=4
+            {352, 317, 317, 286},
+            {  0, 328, 328, 233},
+            {  0, 180, 180, 140},
+            {  0, 132, 132, 105},
+        }
+      }
+    };
+    for (i = 0; i < h; i++) {
+        for (j = 0; j < w; j+=4) {
+            tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
+            tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
+            tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
+            tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
+        }
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
+    s=0;
+    assert(w==h);
+    for(level=0; level<dec_count; level++){
+        for(ori= level ? 1 : 0; ori<4; ori++){
+            int size= w>>(dec_count-level);
+            int sx= (ori&1) ? size : 0;
+            int stride= 32<<(dec_count-level);
+            int sy= (ori&2) ? stride>>1 : 0;
+            for(i=0; i<size; i++){
+                for(j=0; j<size; j++){
+                    int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
+                    s += FFABS(v);
+                }
+            }
+        }
+    }
+    assert(s>=0);
+    return s>>9;
+}
+static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+    return w_c(v, pix1, pix2, line_size,  8, h, 1);
+}
+static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+    return w_c(v, pix1, pix2, line_size,  8, h, 0);
+}
+static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+    return w_c(v, pix1, pix2, line_size, 16, h, 1);
+}
+static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+    return w_c(v, pix1, pix2, line_size, 16, h, 0);
+}
+int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+    return w_c(v, pix1, pix2, line_size, 32, h, 1);
+}
+int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+    return w_c(v, pix1, pix2, line_size, 32, h, 0);
+}
+#endif
+/**
+ * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
+ * @param buf destination buffer
+ * @param src source buffer
+ * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param src_x x coordinate of the top left sample of the block in the source buffer
+ * @param src_y y coordinate of the top left sample of the block in the source buffer
+ * @param w width of the source buffer
+ * @param h height of the source buffer
+ */
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
+                                    int src_x, int src_y, int w, int h){
+    int x, y;
+    int start_y, start_x, end_y, end_x;
+    if(src_y>= h){
+        src+= (h-1-src_y)*linesize;
+        src_y=h-1;
+    }else if(src_y<=-block_h){
+        src+= (1-block_h-src_y)*linesize;
+        src_y=1-block_h;
+    }
+    if(src_x>= w){
+        src+= (w-1-src_x);
+        src_x=w-1;
+    }else if(src_x<=-block_w){
+        src+= (1-block_w-src_x);
+        src_x=1-block_w;
+    }
+    start_y= FFMAX(0, -src_y);
+    start_x= FFMAX(0, -src_x);
+    end_y= FFMIN(block_h, h-src_y);
+    end_x= FFMIN(block_w, w-src_x);
+    // copy existing part
+    for(y=start_y; y<end_y; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= src[x + y*linesize];
+        }
+    }
+    //top
+    for(y=0; y<start_y; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= buf[x + start_y*linesize];
+        }
+    }
+    //bottom
+    for(y=end_y; y<block_h; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
+        }
+    }
+    for(y=0; y<block_h; y++){
+       //left
+        for(x=0; x<start_x; x++){
+            buf[x + y*linesize]= buf[start_x + y*linesize];
+        }
+       //right
+        for(x=end_x; x<block_w; x++){
+            buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
+        }
+    }
+}
+#if 0
+static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+{
+    int i;
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        block[0] = pixels[0];
+        block[1] = pixels[1];
+        block[2] = pixels[2];
+        block[3] = pixels[3];
+        block[4] = pixels[4];
+        block[5] = pixels[5];
+        block[6] = pixels[6];
+        block[7] = pixels[7];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
+                          const uint8_t *s2, int stride){
+    int i;
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        block[0] = s1[0] - s2[0];
+        block[1] = s1[1] - s2[1];
+        block[2] = s1[2] - s2[2];
+        block[3] = s1[3] - s2[3];
+        block[4] = s1[4] - s2[4];
+        block[5] = s1[5] - s2[5];
+        block[6] = s1[6] - s2[6];
+        block[7] = s1[7] - s2[7];
+        s1 += stride;
+        s2 += stride;
+        block += 8;
+    }
+}
+static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        pixels[0] = cm[block[0]];
+        pixels[1] = cm[block[1]];
+        pixels[2] = cm[block[2]];
+        pixels[3] = cm[block[3]];
+        pixels[4] = cm[block[4]];
+        pixels[5] = cm[block[5]];
+        pixels[6] = cm[block[6]];
+        pixels[7] = cm[block[7]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = cm[block[0]];
+        pixels[1] = cm[block[1]];
+        pixels[2] = cm[block[2]];
+        pixels[3] = cm[block[3]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = cm[block[0]];
+        pixels[1] = cm[block[1]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void put_signed_pixels_clamped_c(const DCTELEM *block,
+                                        uint8_t *restrict pixels,
+                                        int line_size)
+{
+    int i, j;
+    for (i = 0; i < 8; i++) {
+        for (j = 0; j < 8; j++) {
+            if (*block < -128)
+                *pixels = 0;
+            else if (*block > 127)
+                *pixels = 255;
+            else
+                *pixels = (uint8_t)(*block + 128);
+            block++;
+            pixels++;
+        }
+        pixels += (line_size - 8);
+    }
+}
+static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        pixels[0] = cm[pixels[0] + block[0]];
+        pixels[1] = cm[pixels[1] + block[1]];
+        pixels[2] = cm[pixels[2] + block[2]];
+        pixels[3] = cm[pixels[3] + block[3]];
+        pixels[4] = cm[pixels[4] + block[4]];
+        pixels[5] = cm[pixels[5] + block[5]];
+        pixels[6] = cm[pixels[6] + block[6]];
+        pixels[7] = cm[pixels[7] + block[7]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = cm[pixels[0] + block[0]];
+        pixels[1] = cm[pixels[1] + block[1]];
+        pixels[2] = cm[pixels[2] + block[2]];
+        pixels[3] = cm[pixels[3] + block[3]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = cm[pixels[0] + block[0]];
+        pixels[1] = cm[pixels[1] + block[1]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
+{
+    int i;
+    for(i=0;i<8;i++) {
+        pixels[0] += block[0];
+        pixels[1] += block[1];
+        pixels[2] += block[2];
+        pixels[3] += block[3];
+        pixels[4] += block[4];
+        pixels[5] += block[5];
+        pixels[6] += block[6];
+        pixels[7] += block[7];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
+{
+    int i;
+    for(i=0;i<4;i++) {
+        pixels[0] += block[0];
+        pixels[1] += block[1];
+        pixels[2] += block[2];
+        pixels[3] += block[3];
+        pixels += line_size;
+        block += 4;
+    }
+}
+static int sum_abs_dctelem_c(DCTELEM *block)
+{
+    int sum=0, i;
+    for(i=0; i<64; i++)
+        sum+= FFABS(block[i]);
+    return sum;
+}
+#if 0
+#define PIXOP2(OPNAME, OP) \
+static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint64_t*)block), AV_RN64(pixels));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels          );\
+        const uint64_t b= AV_RN64(pixels+line_size);\
+        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels          );\
+        const uint64_t b= AV_RN64(pixels+line_size);\
+        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        uint64_t l0=  (a&0x0303030303030303ULL)\
+                    + (b&0x0303030303030303ULL)\
+                    + 0x0202020202020202ULL;\
+        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+        uint64_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint64_t a= AV_RN64(pixels  );\
+            uint64_t b= AV_RN64(pixels+1);\
+            l1=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL);\
+            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN64(pixels  );\
+            b= AV_RN64(pixels+1);\
+            l0=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL)\
+               + 0x0202020202020202ULL;\
+            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        uint64_t l0=  (a&0x0303030303030303ULL)\
+                    + (b&0x0303030303030303ULL)\
+                    + 0x0101010101010101ULL;\
+        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+        uint64_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint64_t a= AV_RN64(pixels  );\
+            uint64_t b= AV_RN64(pixels+1);\
+            l1=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL);\
+            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN64(pixels  );\
+            b= AV_RN64(pixels+1);\
+            l0=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL)\
+               + 0x0101010101010101ULL;\
+            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
+#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
+#else // 64 bit variant
+#define PIXOP2(OPNAME, OP) \
+static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint16_t*)(block  )), AV_RN16(pixels  ));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
+        OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN32(&src1[i*src_stride1  ]);\
+        b= AV_RN32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN32(&src1[i*src_stride1  ]);\
+        b= AV_RN32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN32(&src1[i*src_stride1  ]);\
+        b= AV_RN32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN16(&src1[i*src_stride1  ]);\
+        b= AV_RN16(&src2[i*src_stride2  ]);\
+        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
+    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
+    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a, b, c, d, l0, l1, h0, h1;\
+        a= AV_RN32(&src1[i*src_stride1]);\
+        b= AV_RN32(&src2[i*src_stride2]);\
+        c= AV_RN32(&src3[i*src_stride3]);\
+        d= AV_RN32(&src4[i*src_stride4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x02020202UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        c= AV_RN32(&src3[i*src_stride3+4]);\
+        d= AV_RN32(&src4[i*src_stride4+4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x02020202UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a, b, c, d, l0, l1, h0, h1;\
+        a= AV_RN32(&src1[i*src_stride1]);\
+        b= AV_RN32(&src2[i*src_stride2]);\
+        c= AV_RN32(&src3[i*src_stride3]);\
+        d= AV_RN32(&src4[i*src_stride4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x01010101UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        c= AV_RN32(&src3[i*src_stride3+4]);\
+        d= AV_RN32(&src4[i*src_stride4+4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x01010101UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+    }\
+}\
+static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+}\
+static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i, a0, b0, a1, b1;\
+        a0= pixels[0];\
+        b0= pixels[1] + 2;\
+        a0 += b0;\
+        b0 += pixels[2];\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            a1= pixels[0];\
+            b1= pixels[1];\
+            a1 += b1;\
+            b1 += pixels[2];\
+\
+            block[0]= (a1+a0)>>2; /* FIXME non put */\
+            block[1]= (b1+b0)>>2;\
+\
+            pixels+=line_size;\
+            block +=line_size;\
+\
+            a0= pixels[0];\
+            b0= pixels[1] + 2;\
+            a0 += b0;\
+            b0 += pixels[2];\
+\
+            block[0]= (a1+a0)>>2;\
+            block[1]= (b1+b0)>>2;\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint32_t a= AV_RN32(pixels  );\
+        const uint32_t b= AV_RN32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x02020202UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= AV_RN32(pixels  );\
+            uint32_t b= AV_RN32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN32(pixels  );\
+            b= AV_RN32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x02020202UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int j;\
+    for(j=0; j<2; j++){\
+        int i;\
+        const uint32_t a= AV_RN32(pixels  );\
+        const uint32_t b= AV_RN32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x02020202UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= AV_RN32(pixels  );\
+            uint32_t b= AV_RN32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN32(pixels  );\
+            b= AV_RN32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x02020202UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+        pixels+=4-line_size*(h+1);\
+        block +=4-line_size*h;\
+    }\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int j;\
+    for(j=0; j<2; j++){\
+        int i;\
+        const uint32_t a= AV_RN32(pixels  );\
+        const uint32_t b= AV_RN32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x01010101UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= AV_RN32(pixels  );\
+            uint32_t b= AV_RN32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN32(pixels  );\
+            b= AV_RN32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x01010101UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+        pixels+=4-line_size*(h+1);\
+        block +=4-line_size*h;\
+    }\
+}\
+\
+CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
+#define op_avg(a, b) a = rnd_avg32(a, b)
+#endif
+#define op_put(a, b) a = b
+PIXOP2(avg, op_avg)
+PIXOP2(put, op_put)
+#undef op_avg
+#undef op_put
+#define avg2(a,b) ((a+b+1)>>1)
+#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
+static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
+}
+static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
+}
+static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
+{
+    const int A=(16-x16)*(16-y16);
+    const int B=(   x16)*(16-y16);
+    const int C=(16-x16)*(   y16);
+    const int D=(   x16)*(   y16);
+    int i;
+    for(i=0; i<h; i++)
+    {
+        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
+        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
+        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
+        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
+        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
+        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
+        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
+        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
+        dst+= stride;
+        src+= stride;
+    }
+}
+void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
+{
+    int y, vx, vy;
+    const int s= 1<<shift;
+    width--;
+    height--;
+    for(y=0; y<h; y++){
+        int x;
+        vx= ox;
+        vy= oy;
+        for(x=0; x<8; x++){ //XXX FIXME optimize
+            int src_x, src_y, frac_x, frac_y, index;
+            src_x= vx>>16;
+            src_y= vy>>16;
+            frac_x= src_x&(s-1);
+            frac_y= src_y&(s-1);
+            src_x>>=shift;
+            src_y>>=shift;
+            if((unsigned)src_x < width){
+                if((unsigned)src_y < height){
+                    index= src_x + src_y*stride;
+                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
+                                           + src[index       +1]*   frac_x )*(s-frac_y)
+                                        + (  src[index+stride  ]*(s-frac_x)
+                                           + src[index+stride+1]*   frac_x )*   frac_y
+                                        + r)>>(shift*2);
+                }else{
+                    index= src_x + av_clip(src_y, 0, height)*stride;
+                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
+                                          + src[index       +1]*   frac_x )*s
+                                        + r)>>(shift*2);
+                }
+            }else{
+                if((unsigned)src_y < height){
+                    index= av_clip(src_x, 0, width) + src_y*stride;
+                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
+                                           + src[index+stride  ]*   frac_y )*s
+                                        + r)>>(shift*2);
+                }else{
+                    index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
+                    dst[y*stride + x]=    src[index         ];
+                }
+            }
+            vx+= dxx;
+            vy+= dyx;
+        }
+        ox += dxy;
+        oy += dyy;
+    }
+}
+static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    switch(width){
+    case 2: put_pixels2_c (dst, src, stride, height); break;
+    case 4: put_pixels4_c (dst, src, stride, height); break;
+    case 8: put_pixels8_c (dst, src, stride, height); break;
+    case 16:put_pixels16_c(dst, src, stride, height); break;
+    }
+}
+static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    switch(width){
+    case 2: avg_pixels2_c (dst, src, stride, height); break;
+    case 4: avg_pixels4_c (dst, src, stride, height); break;
+    case 8: avg_pixels8_c (dst, src, stride, height); break;
+    case 16:avg_pixels16_c(dst, src, stride, height); break;
+    }
+}
+static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+#if 0
+#define TPEL_WIDTH(width)\
+static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
+#endif
+#define H264_CHROMA_MC(OPNAME, OP)\
+static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }else{\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}\
+\
+static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }else{\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
+            OP(dst[2], (A*src[2] + E*src[step+2]));\
+            OP(dst[3], (A*src[3] + E*src[step+3]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}\
+\
+static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+            OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
+            OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
+            OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
+            OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }else{\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
+            OP(dst[2], (A*src[2] + E*src[step+2]));\
+            OP(dst[3], (A*src[3] + E*src[step+3]));\
+            OP(dst[4], (A*src[4] + E*src[step+4]));\
+            OP(dst[5], (A*src[5] + E*src[step+5]));\
+            OP(dst[6], (A*src[6] + E*src[step+6]));\
+            OP(dst[7], (A*src[7] + E*src[step+7]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
+#define op_put(a, b) a = (((b) + 32)>>6)
+H264_CHROMA_MC(put_       , op_put)
+H264_CHROMA_MC(avg_       , op_avg)
+#undef op_avg
+#undef op_put
+static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
+    const int A=(8-x)*(8-y);
+    const int B=(  x)*(8-y);
+    const int C=(8-x)*(  y);
+    const int D=(  x)*(  y);
+    int i;
+    assert(x<8 && y<8 && x>=0 && y>=0);
+    for(i=0; i<h; i++)
+    {
+        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
+        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
+        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
+        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
+        dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
+        dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
+        dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
+        dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
+        dst+= stride;
+        src+= stride;
+    }
+}
+#define QPEL_MC(r, OPNAME, RND, OP) \
+static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
+        OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
+        OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
+        OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
+        OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int src0= src[0*srcStride];\
+        const int src1= src[1*srcStride];\
+        const int src2= src[2*srcStride];\
+        const int src3= src[3*srcStride];\
+        const int src4= src[4*srcStride];\
+        const int src5= src[5*srcStride];\
+        const int src6= src[6*srcStride];\
+        const int src7= src[7*srcStride];\
+        const int src8= src[8*srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
+        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
+        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
+        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
+        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    \
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
+        OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
+        OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
+        OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
+        OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
+        OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
+        OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
+        OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
+        OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
+        OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
+        OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
+        OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
+        OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
+        OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
+        OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
+        OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    const int w=16;\
+    for(i=0; i<w; i++)\
+    {\
+        const int src0= src[0*srcStride];\
+        const int src1= src[1*srcStride];\
+        const int src2= src[2*srcStride];\
+        const int src3= src[3*srcStride];\
+        const int src4= src[4*srcStride];\
+        const int src5= src[5*srcStride];\
+        const int src6= src[6*srcStride];\
+        const int src7= src[7*srcStride];\
+        const int src8= src[8*srcStride];\
+        const int src9= src[9*srcStride];\
+        const int src10= src[10*srcStride];\
+        const int src11= src[11*srcStride];\
+        const int src12= src[12*srcStride];\
+        const int src13= src[13*srcStride];\
+        const int src14= src[14*srcStride];\
+        const int src15= src[15*srcStride];\
+        const int src16= src[16*srcStride];\
+        OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
+        OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
+        OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
+        OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
+        OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
+        OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
+        OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
+        OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
+        OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
+        OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
+        OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
+        OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
+        OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
+        OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
+        OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
+        OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels8_c(dst, src, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
+    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
+    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t half[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
+    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    copy_block9(full, src, 16, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
+}\
+\
+static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t half[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
+    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels16_c(dst, src, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
+    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
+    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t half[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
+    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    copy_block17(full, src, 24, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
+}\
+\
+static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t half[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
+    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}\
+static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}
+#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
+#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
+#define op_put(a, b) a = cm[((b) + 16)>>5]
+#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
+QPEL_MC(0, put_       , _       , op_put)
+QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
+QPEL_MC(0, avg_       , _       , op_avg)
+//QPEL_MC(1, avg_no_rnd , _       , op_avg)
+#undef op_avg
+#undef op_avg_no_rnd
+#undef op_put
+#undef op_put_no_rnd
+#if 1
+#define H264_LOWPASS(OPNAME, OP, OP2) \
+static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=2;\
+    const int w=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        const int src5= src[5 *srcStride];\
+        const int src6= src[6 *srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=4;\
+    const int w=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        const int tmp5= tmp[5 *tmpStride];\
+        const int tmp6= tmp[6 *tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
+        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
+        OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
+        OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
+        OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
+        OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        const int src5= src[5 *srcStride];\
+        const int src6= src[6 *srcStride];\
+        const int src7= src[7 *srcStride];\
+        const int src8= src[8 *srcStride];\
+        const int src9= src[9 *srcStride];\
+        const int src10=src[10*srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
+        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
+        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
+        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=8;\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
+        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
+        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
+        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
+        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        const int tmp5= tmp[5 *tmpStride];\
+        const int tmp6= tmp[6 *tmpStride];\
+        const int tmp7= tmp[7 *tmpStride];\
+        const int tmp8= tmp[8 *tmpStride];\
+        const int tmp9= tmp[9 *tmpStride];\
+        const int tmp10=tmp[10*tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
+        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
+        OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
+        OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
+        OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
+        OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
+    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
+    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
+}\
+#define H264_MC(OPNAME, SIZE) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t half[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t half[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfV[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfV[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+#define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
+//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
+#define op_put(a, b)  a = cm[((b) + 16)>>5]
+#define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
+#define op2_put(a, b)  a = cm[((b) + 512)>>10]
+H264_LOWPASS(put_       , op_put, op2_put)
+H264_LOWPASS(avg_       , op_avg, op2_avg)
+H264_MC(put_, 2)
+H264_MC(put_, 4)
+H264_MC(put_, 8)
+H264_MC(put_, 16)
+H264_MC(avg_, 4)
+H264_MC(avg_, 8)
+H264_MC(avg_, 16)
+#undef op_avg
+#undef op_put
+#undef op2_avg
+#undef op2_put
+#endif
+#define op_scale1(x)  block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
+#define op_scale2(x)  dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
+#define H264_WEIGHT(W,H) \
+static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
+    int y; \
+    offset <<= log2_denom; \
+    if(log2_denom) offset += 1<<(log2_denom-1); \
+    for(y=0; y<H; y++, block += stride){ \
+        op_scale1(0); \
+        op_scale1(1); \
+        if(W==2) continue; \
+        op_scale1(2); \
+        op_scale1(3); \
+        if(W==4) continue; \
+        op_scale1(4); \
+        op_scale1(5); \
+        op_scale1(6); \
+        op_scale1(7); \
+        if(W==8) continue; \
+        op_scale1(8); \
+        op_scale1(9); \
+        op_scale1(10); \
+        op_scale1(11); \
+        op_scale1(12); \
+        op_scale1(13); \
+        op_scale1(14); \
+        op_scale1(15); \
+    } \
+} \
+static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
+    int y; \
+    offset = ((offset + 1) | 1) << log2_denom; \
+    for(y=0; y<H; y++, dst += stride, src += stride){ \
+        op_scale2(0); \
+        op_scale2(1); \
+        if(W==2) continue; \
+        op_scale2(2); \
+        op_scale2(3); \
+        if(W==4) continue; \
+        op_scale2(4); \
+        op_scale2(5); \
+        op_scale2(6); \
+        op_scale2(7); \
+        if(W==8) continue; \
+        op_scale2(8); \
+        op_scale2(9); \
+        op_scale2(10); \
+        op_scale2(11); \
+        op_scale2(12); \
+        op_scale2(13); \
+        op_scale2(14); \
+        op_scale2(15); \
+    } \
+}
+H264_WEIGHT(16,16)
+H264_WEIGHT(16,8)
+H264_WEIGHT(8,16)
+H264_WEIGHT(8,8)
+H264_WEIGHT(8,4)
+H264_WEIGHT(4,8)
+H264_WEIGHT(4,4)
+H264_WEIGHT(4,2)
+H264_WEIGHT(2,4)
+H264_WEIGHT(2,2)
+#undef op_scale1
+#undef op_scale2
+#undef H264_WEIGHT
+static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int i;
+    for(i=0; i<h; i++){
+        dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
+        dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
+        dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
+        dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
+        dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
+        dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
+        dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
+        dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+#if CONFIG_CAVS_DECODER
+/* AVS specific */
+void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    put_pixels8_c(dst, src, stride, 8);
+}
+void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    avg_pixels8_c(dst, src, stride, 8);
+}
+void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    put_pixels16_c(dst, src, stride, 16);
+}
+void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    avg_pixels16_c(dst, src, stride, 16);
+}
+#endif /* CONFIG_CAVS_DECODER */
+#if CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER
+/* VC-1 specific */
+void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
+    put_pixels8_c(dst, src, stride, 8);
+}
+#endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
+void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
+/* H264 specific */
+void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
+#if CONFIG_RV30_DECODER
+void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
+#endif /* CONFIG_RV30_DECODER */
+#if CONFIG_RV40_DECODER
+static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+    put_pixels16_xy2_c(dst, src, stride, 16);
+}
+static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+    avg_pixels16_xy2_c(dst, src, stride, 16);
+}
+static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+    put_pixels8_xy2_c(dst, src, stride, 8);
+}
+static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+    avg_pixels8_xy2_c(dst, src, stride, 8);
+}
+void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
+#endif /* CONFIG_RV40_DECODER */
+static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int i;
+    for(i=0; i<w; i++){
+        const int src_1= src[ -srcStride];
+        const int src0 = src[0          ];
+        const int src1 = src[  srcStride];
+        const int src2 = src[2*srcStride];
+        const int src3 = src[3*srcStride];
+        const int src4 = src[4*srcStride];
+        const int src5 = src[5*srcStride];
+        const int src6 = src[6*srcStride];
+        const int src7 = src[7*srcStride];
+        const int src8 = src[8*srcStride];
+        const int src9 = src[9*srcStride];
+        dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
+        dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
+        dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
+        dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
+        dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
+        dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
+        dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
+        dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
+        src++;
+        dst++;
+    }
+}
+static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
+    put_pixels8_c(dst, src, stride, 8);
+}
+static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t half[64];
+    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
+    put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
+}
+static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
+    wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
+}
+static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t half[64];
+    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
+    put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
+}
+static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
+    wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
+}
+static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
+    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
+    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
+    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
+    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
+}
+static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
+    if(CONFIG_ANY_H263) {
+    int x;
+    const int strength= ff_h263_loop_filter_strength[qscale];
+    for(x=0; x<8; x++){
+        int d1, d2, ad1;
+        int p0= src[x-2*stride];
+        int p1= src[x-1*stride];
+        int p2= src[x+0*stride];
+        int p3= src[x+1*stride];
+        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
+        if     (d<-2*strength) d1= 0;
+        else if(d<-  strength) d1=-2*strength - d;
+        else if(d<   strength) d1= d;
+        else if(d< 2*strength) d1= 2*strength - d;
+        else                   d1= 0;
+        p1 += d1;
+        p2 -= d1;
+        if(p1&256) p1= ~(p1>>31);
+        if(p2&256) p2= ~(p2>>31);
+        src[x-1*stride] = p1;
+        src[x+0*stride] = p2;
+        ad1= FFABS(d1)>>1;
+        d2= av_clip((p0-p3)/4, -ad1, ad1);
+        src[x-2*stride] = p0 - d2;
+        src[x+  stride] = p3 + d2;
+    }
+    }
+}
+static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
+    if(CONFIG_ANY_H263) {
+    int y;
+    const int strength= ff_h263_loop_filter_strength[qscale];
+    for(y=0; y<8; y++){
+        int d1, d2, ad1;
+        int p0= src[y*stride-2];
+        int p1= src[y*stride-1];
+        int p2= src[y*stride+0];
+        int p3= src[y*stride+1];
+        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
+        if     (d<-2*strength) d1= 0;
+        else if(d<-  strength) d1=-2*strength - d;
+        else if(d<   strength) d1= d;
+        else if(d< 2*strength) d1= 2*strength - d;
+        else                   d1= 0;
+        p1 += d1;
+        p2 -= d1;
+        if(p1&256) p1= ~(p1>>31);
+        if(p2&256) p2= ~(p2>>31);
+        src[y*stride-1] = p1;
+        src[y*stride+0] = p2;
+        ad1= FFABS(d1)>>1;
+        d2= av_clip((p0-p3)/4, -ad1, ad1);
+        src[y*stride-2] = p0 - d2;
+        src[y*stride+1] = p3 + d2;
+    }
+    }
+}
+static void h261_loop_filter_c(uint8_t *src, int stride){
+    int x,y,xy,yz;
+    int temp[64];
+    for(x=0; x<8; x++){
+        temp[x      ] = 4*src[x           ];
+        temp[x + 7*8] = 4*src[x + 7*stride];
+    }
+    for(y=1; y<7; y++){
+        for(x=0; x<8; x++){
+            xy = y * stride + x;
+            yz = y * 8 + x;
+            temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
+        }
+    }
+    for(y=0; y<8; y++){
+        src[  y*stride] = (temp[  y*8] + 2)>>2;
+        src[7+y*stride] = (temp[7+y*8] + 2)>>2;
+        for(x=1; x<7; x++){
+            xy = y * stride + x;
+            yz = y * 8 + x;
+            src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
+        }
+    }
+}
+static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
+{
+    int i, d;
+    for( i = 0; i < 4; i++ ) {
+        if( tc0[i] < 0 ) {
+            pix += 4*ystride;
+            continue;
+        }
+        for( d = 0; d < 4; d++ ) {
+            const int p0 = pix[-1*xstride];
+            const int p1 = pix[-2*xstride];
+            const int p2 = pix[-3*xstride];
+            const int q0 = pix[0];
+            const int q1 = pix[1*xstride];
+            const int q2 = pix[2*xstride];
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+                int tc = tc0[i];
+                int i_delta;
+                if( FFABS( p2 - p0 ) < beta ) {
+                    pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
+                    tc++;
+                }
+                if( FFABS( q2 - q0 ) < beta ) {
+                    pix[   xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
+                    tc++;
+                }
+                i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+                pix[-xstride] = av_clip_uint8( p0 + i_delta );    /* p0' */
+                pix[0]        = av_clip_uint8( q0 - i_delta );    /* q0' */
+            }
+            pix += ystride;
+        }
+    }
+}
+static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
+}
+static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
+}
+static inline void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
+{
+    int d;
+    for( d = 0; d < 16; d++ ) {
+        const int p2 = pix[-3*xstride];
+        const int p1 = pix[-2*xstride];
+        const int p0 = pix[-1*xstride];
+        const int q0 = pix[ 0*xstride];
+        const int q1 = pix[ 1*xstride];
+        const int q2 = pix[ 2*xstride];
+        if( FFABS( p0 - q0 ) < alpha &&
+            FFABS( p1 - p0 ) < beta &&
+            FFABS( q1 - q0 ) < beta ) {
+            if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
+                if( FFABS( p2 - p0 ) < beta)
+                {
+                    const int p3 = pix[-4*xstride];
+                    /* p0', p1', p2' */
+                    pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+                    pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+                    pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
+                } else {
+                    /* p0' */
+                    pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                }
+                if( FFABS( q2 - q0 ) < beta)
+                {
+                    const int q3 = pix[3*xstride];
+                    /* q0', q1', q2' */
+                    pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+                    pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+                    pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
+                } else {
+                    /* q0' */
+                    pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                }
+            }else{
+                /* p0', q0' */
+                pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+            }
+        }
+        pix += ystride;
+    }
+}
+static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
+}
+static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
+}
+static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
+{
+    int i, d;
+    for( i = 0; i < 4; i++ ) {
+        const int tc = tc0[i];
+        if( tc <= 0 ) {
+            pix += 2*ystride;
+            continue;
+        }
+        for( d = 0; d < 2; d++ ) {
+            const int p0 = pix[-1*xstride];
+            const int p1 = pix[-2*xstride];
+            const int q0 = pix[0];
+            const int q1 = pix[1*xstride];
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+                int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+                pix[-xstride] = av_clip_uint8( p0 + delta );    /* p0' */
+                pix[0]        = av_clip_uint8( q0 - delta );    /* q0' */
+            }
+            pix += ystride;
+        }
+    }
+}
+static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
+}
+static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
+}
+static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
+{
+    int d;
+    for( d = 0; d < 8; d++ ) {
+        const int p0 = pix[-1*xstride];
+        const int p1 = pix[-2*xstride];
+        const int q0 = pix[0];
+        const int q1 = pix[1*xstride];
+        if( FFABS( p0 - q0 ) < alpha &&
+            FFABS( p1 - p0 ) < beta &&
+            FFABS( q1 - q0 ) < beta ) {
+            pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
+            pix[0]        = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
+        }
+        pix += ystride;
+    }
+}
+static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
+}
+static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
+}
+static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - pix2[0]);
+        s += abs(pix1[1] - pix2[1]);
+        s += abs(pix1[2] - pix2[2]);
+        s += abs(pix1[3] - pix2[3]);
+        s += abs(pix1[4] - pix2[4]);
+        s += abs(pix1[5] - pix2[5]);
+        s += abs(pix1[6] - pix2[6]);
+        s += abs(pix1[7] - pix2[7]);
+        s += abs(pix1[8] - pix2[8]);
+        s += abs(pix1[9] - pix2[9]);
+        s += abs(pix1[10] - pix2[10]);
+        s += abs(pix1[11] - pix2[11]);
+        s += abs(pix1[12] - pix2[12]);
+        s += abs(pix1[13] - pix2[13]);
+        s += abs(pix1[14] - pix2[14]);
+        s += abs(pix1[15] - pix2[15]);
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
+        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
+        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
+        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
+        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
+        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
+        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
+        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
+        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
+        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
+        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
+        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
+        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
+        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
+        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
+        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
+        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
+        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
+        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
+        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
+        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
+        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
+        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
+        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
+        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
+        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
+        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
+        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
+        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
+        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
+        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
+        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
+        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
+        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
+        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
+        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
+        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
+        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
+        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
+        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
+        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
+        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
+        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
+        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
+        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
+        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - pix2[0]);
+        s += abs(pix1[1] - pix2[1]);
+        s += abs(pix1[2] - pix2[2]);
+        s += abs(pix1[3] - pix2[3]);
+        s += abs(pix1[4] - pix2[4]);
+        s += abs(pix1[5] - pix2[5]);
+        s += abs(pix1[6] - pix2[6]);
+        s += abs(pix1[7] - pix2[7]);
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
+        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
+        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
+        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
+        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
+        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
+        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
+        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
+        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
+        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
+        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
+        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
+        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
+        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
+        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
+        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
+        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
+        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
+        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
+        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
+        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
+        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
+    MpegEncContext *c = v;
+    int score1=0;
+    int score2=0;
+    int x,y;
+    for(y=0; y<h; y++){
+        for(x=0; x<16; x++){
+            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
+        }
+        if(y+1<h){
+            for(x=0; x<15; x++){
+                score2+= FFABS(  s1[x  ] - s1[x  +stride]
+                             - s1[x+1] + s1[x+1+stride])
+                        -FFABS(  s2[x  ] - s2[x  +stride]
+                             - s2[x+1] + s2[x+1+stride]);
+            }
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
+    else  return score1 + FFABS(score2)*8;
+}
+static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
+    MpegEncContext *c = v;
+    int score1=0;
+    int score2=0;
+    int x,y;
+    for(y=0; y<h; y++){
+        for(x=0; x<8; x++){
+            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
+        }
+        if(y+1<h){
+            for(x=0; x<7; x++){
+                score2+= FFABS(  s1[x  ] - s1[x  +stride]
+                             - s1[x+1] + s1[x+1+stride])
+                        -FFABS(  s2[x  ] - s2[x  +stride]
+                             - s2[x+1] + s2[x+1+stride]);
+            }
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
+    else  return score1 + FFABS(score2)*8;
+}
+static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
+    int i;
+    unsigned int sum=0;
+    for(i=0; i<8*8; i++){
+        int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
+        int w= weight[i];
+        b>>= RECON_SHIFT;
+        assert(-512<b && b<512);
+        sum += (w*b)*(w*b)>>4;
+    }
+    return sum>>2;
+}
+static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
+    int i;
+    for(i=0; i<8*8; i++){
+        rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
+    }
+}
+/**
+ * permutes an 8x8 block.
+ * @param block the block which will be permuted according to the given permutation vector
+ * @param permutation the permutation vector
+ * @param last the last non zero coefficient in scantable order, used to speed the permutation up
+ * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
+ *                  (inverse) permutated to scantable order!
+ */
+void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
+{
+    int i;
+    DCTELEM temp[64];
+    if(last<=0) return;
+    //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
+    for(i=0; i<=last; i++){
+        const int j= scantable[i];
+        temp[j]= block[j];
+        block[j]=0;
+    }
+    for(i=0; i<=last; i++){
+        const int j= scantable[i];
+        const int perm_j= permutation[j];
+        block[perm_j]= temp[j];
+    }
+}
+static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
+    return 0;
+}
+void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
+    int i;
+    memset(cmp, 0, sizeof(void*)*6);
+    for(i=0; i<6; i++){
+        switch(type&0xFF){
+        case FF_CMP_SAD:
+            cmp[i]= c->sad[i];
+            break;
+        case FF_CMP_SATD:
+            cmp[i]= c->hadamard8_diff[i];
+            break;
+        case FF_CMP_SSE:
+            cmp[i]= c->sse[i];
+            break;
+        case FF_CMP_DCT:
+            cmp[i]= c->dct_sad[i];
+            break;
+        case FF_CMP_DCT264:
+            cmp[i]= c->dct264_sad[i];
+            break;
+        case FF_CMP_DCTMAX:
+            cmp[i]= c->dct_max[i];
+            break;
+        case FF_CMP_PSNR:
+            cmp[i]= c->quant_psnr[i];
+            break;
+        case FF_CMP_BIT:
+            cmp[i]= c->bit[i];
+            break;
+        case FF_CMP_RD:
+            cmp[i]= c->rd[i];
+            break;
+        case FF_CMP_VSAD:
+            cmp[i]= c->vsad[i];
+            break;
+        case FF_CMP_VSSE:
+            cmp[i]= c->vsse[i];
+            break;
+        case FF_CMP_ZERO:
+            cmp[i]= zero_cmp;
+            break;
+        case FF_CMP_NSSE:
+            cmp[i]= c->nsse[i];
+            break;
+#if CONFIG_SNOW_ENCODER
+        case FF_CMP_W53:
+            cmp[i]= c->w53[i];
+            break;
+        case FF_CMP_W97:
+            cmp[i]= c->w97[i];
+            break;
+#endif
+        default:
+            av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
+        }
+    }
+}
+static void clear_block_c(DCTELEM *block)
+{
+    memset(block, 0, sizeof(DCTELEM)*64);
+}
+/**
+ * memset(blocks, 0, sizeof(DCTELEM)*6*64)
+ */
+static void clear_blocks_c(DCTELEM *blocks)
+{
+    memset(blocks, 0, sizeof(DCTELEM)*6*64);
+}
+static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
+    long i;
+    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+        long a = *(long*)(src+i);
+        long b = *(long*)(dst+i);
+        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
+    }
+    for(; i<w; i++)
+        dst[i+0] += src[i+0];
+}
+static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
+    long i;
+    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+        long a = *(long*)(src1+i);
+        long b = *(long*)(src2+i);
+        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
+    }
+    for(; i<w; i++)
+        dst[i] = src1[i]+src2[i];
+}
+static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
+    long i;
+#if !HAVE_FAST_UNALIGNED
+    if((long)src2 & (sizeof(long)-1)){
+        for(i=0; i+7<w; i+=8){
+            dst[i+0] = src1[i+0]-src2[i+0];
+            dst[i+1] = src1[i+1]-src2[i+1];
+            dst[i+2] = src1[i+2]-src2[i+2];
+            dst[i+3] = src1[i+3]-src2[i+3];
+            dst[i+4] = src1[i+4]-src2[i+4];
+            dst[i+5] = src1[i+5]-src2[i+5];
+            dst[i+6] = src1[i+6]-src2[i+6];
+            dst[i+7] = src1[i+7]-src2[i+7];
+        }
+    }else
+#endif
+    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+        long a = *(long*)(src1+i);
+        long b = *(long*)(src2+i);
+        *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
+    }
+    for(; i<w; i++)
+        dst[i+0] = src1[i+0]-src2[i+0];
+}
+static void add_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *diff, int w, int *left, int *left_top){
+    int i;
+    uint8_t l, lt;
+    l= *left;
+    lt= *left_top;
+    for(i=0; i<w; i++){
+        l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
+        lt= src1[i];
+        dst[i]= l;
+    }
+    *left= l;
+    *left_top= lt;
+}
+static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
+    int i;
+    uint8_t l, lt;
+    l= *left;
+    lt= *left_top;
+    for(i=0; i<w; i++){
+        const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
+        lt= src1[i];
+        l= src2[i];
+        dst[i]= l - pred;
+    }
+    *left= l;
+    *left_top= lt;
+}
+#define BUTTERFLY2(o1,o2,i1,i2) \
+o1= (i1)+(i2);\
+o2= (i1)-(i2);
+#define BUTTERFLY1(x,y) \
+{\
+    int a,b;\
+    a= x;\
+    b= y;\
+    x= a+b;\
+    y= a-b;\
+}
+#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
+static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
+    int i;
+    int temp[64];
+    int sum=0;
+    assert(h==8);
+    for(i=0; i<8; i++){
+        //FIXME try pointer walks
+        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
+        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
+        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
+        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
+        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
+        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
+        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
+        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
+    }
+    for(i=0; i<8; i++){
+        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
+        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
+        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
+        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
+        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
+        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
+        sum +=
+             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
+    }
+#if 0
+static int maxi=0;
+if(sum>maxi){
+    maxi=sum;
+    printf("MAX:%d\n", maxi);
+}
+#endif
+    return sum;
+}
+static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
+    int i;
+    int temp[64];
+    int sum=0;
+    assert(h==8);
+    for(i=0; i<8; i++){
+        //FIXME try pointer walks
+        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
+        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
+        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
+        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
+        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
+        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
+        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
+        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
+    }
+    for(i=0; i<8; i++){
+        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
+        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
+        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
+        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
+        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
+        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
+        sum +=
+             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
+    }
+    sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
+    return sum;
+}
+static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
+    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    assert(h==8);
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    s->dsp.fdct(temp);
+    return s->dsp.sum_abs_dctelem(temp);
+}
+#if CONFIG_GPL
+#define DCT8_1D {\
+    const int s07 = SRC(0) + SRC(7);\
+    const int s16 = SRC(1) + SRC(6);\
+    const int s25 = SRC(2) + SRC(5);\
+    const int s34 = SRC(3) + SRC(4);\
+    const int a0 = s07 + s34;\
+    const int a1 = s16 + s25;\
+    const int a2 = s07 - s34;\
+    const int a3 = s16 - s25;\
+    const int d07 = SRC(0) - SRC(7);\
+    const int d16 = SRC(1) - SRC(6);\
+    const int d25 = SRC(2) - SRC(5);\
+    const int d34 = SRC(3) - SRC(4);\
+    const int a4 = d16 + d25 + (d07 + (d07>>1));\
+    const int a5 = d07 - d34 - (d25 + (d25>>1));\
+    const int a6 = d07 + d34 - (d16 + (d16>>1));\
+    const int a7 = d16 - d25 + (d34 + (d34>>1));\
+    DST(0,  a0 + a1     ) ;\
+    DST(1,  a4 + (a7>>2)) ;\
+    DST(2,  a2 + (a3>>1)) ;\
+    DST(3,  a5 + (a6>>2)) ;\
+    DST(4,  a0 - a1     ) ;\
+    DST(5,  a6 - (a5>>2)) ;\
+    DST(6, (a2>>1) - a3 ) ;\
+    DST(7, (a4>>2) - a7 ) ;\
+}
+static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    DCTELEM dct[8][8];
+    int i;
+    int sum=0;
+    s->dsp.diff_pixels(dct[0], src1, src2, stride);
+#define SRC(x) dct[i][x]
+#define DST(x,v) dct[i][x]= v
+    for( i = 0; i < 8; i++ )
+        DCT8_1D
+#undef SRC
+#undef DST
+#define SRC(x) dct[x][i]
+#define DST(x,v) sum += FFABS(v)
+    for( i = 0; i < 8; i++ )
+        DCT8_1D
+#undef SRC
+#undef DST
+    return sum;
+}
+#endif
+static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
+    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    int sum=0, i;
+    assert(h==8);
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    s->dsp.fdct(temp);
+    for(i=0; i<64; i++)
+        sum= FFMAX(sum, FFABS(temp[i]));
+    return sum;
+}
+static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]);
+    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
+    int sum=0, i;
+    assert(h==8);
+    s->mb_intra=0;
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    memcpy(bak, temp, 64*sizeof(DCTELEM));
+    s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+    s->dct_unquantize_inter(s, temp, 0, s->qscale);
+    ff_simple_idct(temp); //FIXME
+    for(i=0; i<64; i++)
+        sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
+    return sum;
+}
+static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    const uint8_t *scantable= s->intra_scantable.permutated;
+    DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
+    DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]);
+    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    uint8_t * const bak= (uint8_t*)aligned_bak;
+    int i, last, run, bits, level, distortion, start_i;
+    const int esc_length= s->ac_esc_length;
+    uint8_t * length;
+    uint8_t * last_length;
+    assert(h==8);
+    for(i=0; i<8; i++){
+        ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
+        ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
+    }
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+    bits=0;
+    if (s->mb_intra) {
+        start_i = 1;
+        length     = s->intra_ac_vlc_length;
+        last_length= s->intra_ac_vlc_last_length;
+        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
+    } else {
+        start_i = 0;
+        length     = s->inter_ac_vlc_length;
+        last_length= s->inter_ac_vlc_last_length;
+    }
+    if(last>=start_i){
+        run=0;
+        for(i=start_i; i<last; i++){
+            int j= scantable[i];
+            level= temp[j];
+            if(level){
+                level+=64;
+                if((level&(~127)) == 0){
+                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
+                }else
+                    bits+= esc_length;
+                run=0;
+            }else
+                run++;
+        }
+        i= scantable[last];
+        level= temp[i] + 64;
+        assert(level - 64);
+        if((level&(~127)) == 0){
+            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
+        }else
+            bits+= esc_length;
+    }
+    if(last>=0){
+        if(s->mb_intra)
+            s->dct_unquantize_intra(s, temp, 0, s->qscale);
+        else
+            s->dct_unquantize_inter(s, temp, 0, s->qscale);
+    }
+    s->dsp.idct_add(bak, stride, temp);
+    distortion= s->dsp.sse[1](NULL, bak, src1, stride, 8);
+    return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
+}
+static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    const uint8_t *scantable= s->intra_scantable.permutated;
+    DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
+    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    int i, last, run, bits, level, start_i;
+    const int esc_length= s->ac_esc_length;
+    uint8_t * length;
+    uint8_t * last_length;
+    assert(h==8);
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+    bits=0;
+    if (s->mb_intra) {
+        start_i = 1;
+        length     = s->intra_ac_vlc_length;
+        last_length= s->intra_ac_vlc_last_length;
+        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
+    } else {
+        start_i = 0;
+        length     = s->inter_ac_vlc_length;
+        last_length= s->inter_ac_vlc_last_length;
+    }
+    if(last>=start_i){
+        run=0;
+        for(i=start_i; i<last; i++){
+            int j= scantable[i];
+            level= temp[j];
+            if(level){
+                level+=64;
+                if((level&(~127)) == 0){
+                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
+                }else
+                    bits+= esc_length;
+                run=0;
+            }else
+                run++;
+        }
+        i= scantable[last];
+        level= temp[i] + 64;
+        assert(level - 64);
+        if((level&(~127)) == 0){
+            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
+        }else
+            bits+= esc_length;
+    }
+    return bits;
+}
+#define VSAD_INTRA(size) \
+static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
+    int score=0;                                                                                            \
+    int x,y;                                                                                                \
+                                                                                                            \
+    for(y=1; y<h; y++){                                                                                     \
+        for(x=0; x<size; x+=4){                                                                             \
+            score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])                           \
+                   +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);                          \
+        }                                                                                                   \
+        s+= stride;                                                                                         \
+    }                                                                                                       \
+                                                                                                            \
+    return score;                                                                                           \
+}
+VSAD_INTRA(8)
+VSAD_INTRA(16)
+static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
+    int score=0;
+    int x,y;
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x++){
+            score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+    return score;
+}
+#define SQ(a) ((a)*(a))
+#define VSSE_INTRA(size) \
+static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
+    int score=0;                                                                                            \
+    int x,y;                                                                                                \
+                                                                                                            \
+    for(y=1; y<h; y++){                                                                                     \
+        for(x=0; x<size; x+=4){                                                                               \
+            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])                                 \
+                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);                                \
+        }                                                                                                   \
+        s+= stride;                                                                                         \
+    }                                                                                                       \
+                                                                                                            \
+    return score;                                                                                           \
+}
+VSSE_INTRA(8)
+VSSE_INTRA(16)
+static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
+    int score=0;
+    int x,y;
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x++){
+            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+    return score;
+}
+static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
+                               int size){
+    int score=0;
+    int i;
+    for(i=0; i<size; i++)
+        score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
+    return score;
+}
+WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
+WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
+WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
+#if CONFIG_GPL
+WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
+#endif
+WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
+WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
+WRAPPER8_16_SQ(rd8x8_c, rd16_c)
+WRAPPER8_16_SQ(bit8x8_c, bit16_c)
+#endif
+static void vector_fmul_c(float *dst, const float *src, int len){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i] *= src[i];
+}
+static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
+    int i;
+    src1 += len-1;
+    for(i=0; i<len; i++)
+        dst[i] = src0[i] * src1[-i];
+}
+void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i*step] = src0[i] * src1[i] + src2[i] + src3;
+}
+void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
+    int i,j;
+    dst += len;
+    win += len;
+    src0+= len;
+    for(i=-len, j=len-1; i<0; i++, j--) {
+        float s0 = src0[i];
+        float s1 = src1[j];
+        float wi = win[i];
+        float wj = win[j];
+        dst[i] = s0*wj - s1*wi + add_bias;
+        dst[j] = s0*wi + s1*wj + add_bias;
+    }
+}
+#if 0
+static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i] = src[i] * mul;
+}
+static av_always_inline int float_to_int16_one(const float *src){
+    int_fast32_t tmp = *(const int32_t*)src;
+    if(tmp & 0xf0000){
+        tmp = (0x43c0ffff - tmp)>>31;
+        // is this faster on some gcc/cpu combinations?
+//      if(tmp > 0x43c0ffff) tmp = 0xFFFF;
+//      else                 tmp = 0;
+    }
+    return tmp - 0x8000;
+}
+void ff_float_to_int16_c(int16_t *dst, const float *src, long len){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i] = float_to_int16_one(src+i);
+}
+void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){
+    int i,j,c;
+    if(channels==2){
+        for(i=0; i<len; i++){
+            dst[2*i]   = float_to_int16_one(src[0]+i);
+            dst[2*i+1] = float_to_int16_one(src[1]+i);
+        }
+    }else{
+        for(c=0; c<channels; c++)
+            for(i=0, j=c; i<len; i++, j+=channels)
+                dst[j] = float_to_int16_one(src[c]+i);
+    }
+}
+static void add_int16_c(int16_t * v1, int16_t * v2, int order)
+{
+    while (order--)
+       *v1++ += *v2++;
+}
+static void sub_int16_c(int16_t * v1, int16_t * v2, int order)
+{
+    while (order--)
+        *v1++ -= *v2++;
+}
+static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift)
+{
+    int res = 0;
+    while (order--)
+        res += (*v1++ * *v2++) >> shift;
+    return res;
+}
+#define W0 2048
+#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
+#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
+#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
+#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
+#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
+#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
+#define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
+static void wmv2_idct_row(short * b)
+{
+    int s1,s2;
+    int a0,a1,a2,a3,a4,a5,a6,a7;
+    /*step 1*/
+    a1 = W1*b[1]+W7*b[7];
+    a7 = W7*b[1]-W1*b[7];
+    a5 = W5*b[5]+W3*b[3];
+    a3 = W3*b[5]-W5*b[3];
+    a2 = W2*b[2]+W6*b[6];
+    a6 = W6*b[2]-W2*b[6];
+    a0 = W0*b[0]+W0*b[4];
+    a4 = W0*b[0]-W0*b[4];
+    /*step 2*/
+    s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
+    s2 = (181*(a1-a5-a7+a3)+128)>>8;
+    /*step 3*/
+    b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
+    b[1] = (a4+a6 +s1   + (1<<7))>>8;
+    b[2] = (a4-a6 +s2   + (1<<7))>>8;
+    b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
+    b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
+    b[5] = (a4-a6 -s2   + (1<<7))>>8;
+    b[6] = (a4+a6 -s1   + (1<<7))>>8;
+    b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
+}
+static void wmv2_idct_col(short * b)
+{
+    int s1,s2;
+    int a0,a1,a2,a3,a4,a5,a6,a7;
+    /*step 1, with extended precision*/
+    a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
+    a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
+    a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
+    a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
+    a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
+    a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
+    a0 = (W0*b[8*0]+W0*b[8*4]    )>>3;
+    a4 = (W0*b[8*0]-W0*b[8*4]    )>>3;
+    /*step 2*/
+    s1 = (181*(a1-a5+a7-a3)+128)>>8;
+    s2 = (181*(a1-a5-a7+a3)+128)>>8;
+    /*step 3*/
+    b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
+    b[8*1] = (a4+a6 +s1   + (1<<13))>>14;
+    b[8*2] = (a4-a6 +s2   + (1<<13))>>14;
+    b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
+    b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
+    b[8*5] = (a4-a6 -s2   + (1<<13))>>14;
+    b[8*6] = (a4+a6 -s1   + (1<<13))>>14;
+    b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
+}
+void ff_wmv2_idct_c(short * block){
+    int i;
+    for(i=0;i<64;i+=8){
+        wmv2_idct_row(block+i);
+    }
+    for(i=0;i<8;i++){
+        wmv2_idct_col(block+i);
+    }
+}
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+ converted */
+static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_wmv2_idct_c(block);
+    put_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_wmv2_idct_c(block);
+    add_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct (block);
+    put_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct (block);
+    add_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct4 (block);
+    put_pixels_clamped4_c(block, dest, line_size);
+}
+static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct4 (block);
+    add_pixels_clamped4_c(block, dest, line_size);
+}
+static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct2 (block);
+    put_pixels_clamped2_c(block, dest, line_size);
+}
+static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct2 (block);
+    add_pixels_clamped2_c(block, dest, line_size);
+}
+static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    dest[0] = cm[(block[0] + 4)>>3];
+}
+static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
+}
+static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
+#endif
+/* init static data */
+void dsputil_static_init(void)
+{
+    int i;
+    for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
+    for(i=0;i<MAX_NEG_CROP;i++) {
+        ff_cropTbl[i] = 0;
+        ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
+    }
+    for(i=0;i<512;i++) {
+        ff_squareTbl[i] = (i - 256) * (i - 256);
+    }
+    for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
+}
+int ff_check_alignment(void){
+    static int did_fail=0;
+    DECLARE_ALIGNED_16(int, aligned);
+    if((long)&aligned & 15){
+        if(!did_fail){
+#if HAVE_MMX || HAVE_ALTIVEC
+            av_log(NULL, AV_LOG_ERROR,
+                "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
+                "and may be very slow or crash. This is not a bug in libavcodec,\n"
+                "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
+                "Do not report crashes to FFmpeg developers.\n");
+#endif
+            did_fail=1;
+        }
+        return -1;
+    }
+    return 0;
+}
+void dsputil_init(DSPContext* c)
+{
+    ff_check_alignment();
+    c->vector_fmul = vector_fmul_c;
+    c->vector_fmul_reverse = vector_fmul_reverse_c;
+    c->vector_fmul_add_add = ff_vector_fmul_add_add_c;
+    c->vector_fmul_window = ff_vector_fmul_window_c;
+}
diff --git a/apps/codecs/libatrac/dsputil.h b/apps/codecs/libatrac/dsputil.h
new file mode 100644
index 0000000000..3bb0ff77a5
--- /dev/null
+++ b/apps/codecs/libatrac/dsputil.h
@@ -0,0 +1,898 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/dsputil.h
+ * DSP utils.
+ * note, many functions in here may use MMX which trashes the FPU state, it is
+ * absolutely necessary to call emms_c() between dsp & float/double code
+ */
+#ifndef AVCODEC_DSPUTIL_H
+#define AVCODEC_DSPUTIL_H
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
+//#define DEBUG
+/* dct code */
+typedef short DCTELEM;
+typedef int DWTELEM;
+typedef short IDWTELEM;
+void fdct_ifast (DCTELEM *data);
+void fdct_ifast248 (DCTELEM *data);
+void ff_jpeg_fdct_islow (DCTELEM *data);
+void ff_fdct248_islow (DCTELEM *data);
+void j_rev_dct (DCTELEM *data);
+void j_rev_dct4 (DCTELEM *data);
+void j_rev_dct2 (DCTELEM *data);
+void j_rev_dct1 (DCTELEM *data);
+void ff_wmv2_idct_c(DCTELEM *data);
+void ff_fdct_mmx(DCTELEM *block);
+void ff_fdct_mmx2(DCTELEM *block);
+void ff_fdct_sse2(DCTELEM *block);
+void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
+void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
+void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
+                              const float *src2, int src3, int blocksize, int step);
+void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
+                             const float *win, float add_bias, int len);
+void ff_float_to_int16_c(int16_t *dst, const float *src, long len);
+void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels);
+/* encoding scans */
+extern const uint8_t ff_alternate_horizontal_scan[64];
+extern const uint8_t ff_alternate_vertical_scan[64];
+extern const uint8_t ff_zigzag_direct[64];
+extern const uint8_t ff_zigzag248_direct[64];
+/* pixel operations */
+#define MAX_NEG_CROP 1024
+/* temporary */
+extern uint32_t ff_squareTbl[512];
+extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
+/* VP3 DSP functions */
+void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
+void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
+void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
+/* VP6 DSP functions */
+void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
+                           const int16_t *h_weights, const int16_t *v_weights);
+/* 1/2^n downscaling functions from imgconvert.c */
+void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+              int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+/* minimum alignment rules ;)
+If you notice errors in the align stuff, need more alignment for some ASM code
+for some CPU or need to use a function with less aligned data then send a mail
+to the ffmpeg-devel mailing list, ...
+!warning These alignments might not match reality, (missing attribute((align))
+stuff somewhere possible).
+I (Michael) did not check them, these are just the alignments which I think
+could be reached easily ...
+!future video codecs might need functions with less strict alignment
+*/
+/*
+void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
+void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
+void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
+void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
+void clear_blocks_c(DCTELEM *blocks);
+*/
+/* add and put pixel (decoding) */
+// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
+//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
+typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
+typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
+typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
+typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
+typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
+typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
+#define DEF_OLD_QPEL(name)\
+void ff_put_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
+void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
+void ff_avg_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
+DEF_OLD_QPEL(qpel16_mc11_old_c)
+DEF_OLD_QPEL(qpel16_mc31_old_c)
+DEF_OLD_QPEL(qpel16_mc12_old_c)
+DEF_OLD_QPEL(qpel16_mc32_old_c)
+DEF_OLD_QPEL(qpel16_mc13_old_c)
+DEF_OLD_QPEL(qpel16_mc33_old_c)
+DEF_OLD_QPEL(qpel8_mc11_old_c)
+DEF_OLD_QPEL(qpel8_mc31_old_c)
+DEF_OLD_QPEL(qpel8_mc12_old_c)
+DEF_OLD_QPEL(qpel8_mc32_old_c)
+DEF_OLD_QPEL(qpel8_mc13_old_c)
+DEF_OLD_QPEL(qpel8_mc33_old_c)
+#define CALL_2X_PIXELS(a, b, n)\
+static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    b(block  , pixels  , line_size, h);\
+    b(block+n, pixels+n, line_size, h);\
+}
+/* motion estimation */
+// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
+// although currently h<4 is not used as functions with width <8 are neither used nor implemented
+typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
+// for snow slices
+typedef struct slice_buffer_s slice_buffer;
+/**
+ * Scantable.
+ */
+typedef struct ScanTable{
+    const uint8_t *scantable;
+    uint8_t permutated[64];
+    uint8_t raster_end[64];
+#if ARCH_PPC
+                /** Used by dct_quantize_altivec to find last-non-zero */
+    DECLARE_ALIGNED(16, uint8_t, inverse[64]);
+#endif
+} ScanTable;
+void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize,
+                         int block_w, int block_h,
+                         int src_x, int src_y, int w, int h);
+/**
+ * DSPContext.
+ */
+typedef struct DSPContext {
+    /* pixel ops : interface with DCT */
+    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
+    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
+    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
+    void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
+    int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/);
+    /**
+     * translational global motion compensation.
+     */
+    void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
+    /**
+     * global motion compensation.
+     */
+    void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
+                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+    void (*clear_block)(DCTELEM *block/*align 16*/);
+    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
+    int (*pix_sum)(uint8_t * pix, int line_size);
+    int (*pix_norm1)(uint8_t * pix, int line_size);
+// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
+    me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
+    me_cmp_func sse[6];
+    me_cmp_func hadamard8_diff[6];
+    me_cmp_func dct_sad[6];
+    me_cmp_func quant_psnr[6];
+    me_cmp_func bit[6];
+    me_cmp_func rd[6];
+    me_cmp_func vsad[6];
+    me_cmp_func vsse[6];
+    me_cmp_func nsse[6];
+    me_cmp_func w53[6];
+    me_cmp_func w97[6];
+    me_cmp_func dct_max[6];
+    me_cmp_func dct264_sad[6];
+    me_cmp_func me_pre_cmp[6];
+    me_cmp_func me_cmp[6];
+    me_cmp_func me_sub_cmp[6];
+    me_cmp_func mb_cmp[6];
+    me_cmp_func ildct_cmp[6]; //only width 16 used
+    me_cmp_func frame_skip_cmp[6]; //only width 8 used
+    int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
+                             int size);
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[4][4] of motion compensation functions for 4
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_pixels_tab[4][4];
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * This is an array[4][4] of motion compensation functions for 4
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b+1)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_pixels_tab[4][4];
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[2][4] of motion compensation functions for 2
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_no_rnd_pixels_tab[4][4];
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[2][4] of motion compensation functions for 2
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_no_rnd_pixels_tab[4][4];
+    void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
+    /**
+     * Thirdpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[12] of motion compensation functions for the 9 thirdpe
+     * positions<br>
+     * *pixels_tab[ xthirdpel + 4*ythirdpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+    tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+    qpel_mc_func put_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_qpel_pixels_tab[2][16];
+    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
+    qpel_mc_func put_mspel_pixels_tab[8];
+    /**
+     * h264 Chroma MC
+     */
+    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
+    /* This is really one func used in VC-1 decoding */
+    h264_chroma_mc_func put_no_rnd_h264_chroma_pixels_tab[3];
+    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
+    qpel_mc_func put_h264_qpel_pixels_tab[4][16];
+    qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
+    qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
+    qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];
+    h264_weight_func weight_h264_pixels_tab[10];
+    h264_biweight_func biweight_h264_pixels_tab[10];
+    /* AVS specific */
+    qpel_mc_func put_cavs_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_cavs_qpel_pixels_tab[2][16];
+    void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
+    me_cmp_func pix_abs[2][4];
+    /* huffyuv specific */
+    void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
+    void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w);
+    void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
+    /**
+     * subtract huffyuv's variant of median prediction
+     * note, this might read from src1[-1], src2[-1]
+     */
+    void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top);
+    void (*add_hfyu_median_prediction)(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top);
+    /* this might write to dst[w] */
+    void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
+    void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
+    void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
+    /* v/h_loop_filter_luma_intra: align 16 */
+    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
+    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
+    void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
+    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
+    // h264_loop_filter_strength: simd only. the C version is inlined in h264.c
+    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
+                                      int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
+    void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*h261_loop_filter)(uint8_t *src, int stride);
+    void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
+    void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
+    void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
+                             const int16_t *h_weights,const int16_t *v_weights);
+    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
+    void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
+    void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
+    /* no alignment needed */
+    void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc);
+    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
+    void (*vector_fmul)(float *dst, const float *src, int len);
+    void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
+    /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
+    void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step);
+    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
+    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
+    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
+    void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
+    /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
+     * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
+    void (*float_to_int16)(int16_t *dst, const float *src, long len);
+    void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels);
+    /* (I)DCT */
+    void (*fdct)(DCTELEM *block/* align 16*/);
+    void (*fdct248)(DCTELEM *block/* align 16*/);
+    /* IDCT really*/
+    void (*idct)(DCTELEM *block/* align 16*/);
+    /**
+     * block -> idct -> clip to unsigned 8 bit -> dest.
+     * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
+     * @param line_size size in bytes of a horizontal line of dest
+     */
+    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+    /**
+     * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
+     * @param line_size size in bytes of a horizontal line of dest
+     */
+    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+    /**
+     * idct input permutation.
+     * several optimized IDCTs need a permutated input (relative to the normal order of the reference
+     * IDCT)
+     * this permutation must be performed before the idct_put/add, note, normally this can be merged
+     * with the zigzag/alternate scan<br>
+     * an example to avoid confusion:
+     * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
+     * - (x -> referece dct -> reference idct -> x)
+     * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
+     * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
+     */
+    uint8_t idct_permutation[64];
+    int idct_permutation_type;
+#define FF_NO_IDCT_PERM 1
+#define FF_LIBMPEG2_IDCT_PERM 2
+#define FF_SIMPLE_IDCT_PERM 3
+#define FF_TRANSPOSE_IDCT_PERM 4
+#define FF_PARTTRANS_IDCT_PERM 5
+#define FF_SSE2_IDCT_PERM 6
+    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
+    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
+#define BASIS_SHIFT 16
+#define RECON_SHIFT 6
+    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w);
+#define EDGE_WIDTH 16
+    /* h264 functions */
+    /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them
+       NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them
+        The reason for above, is that no 2 out of one list may use a different permutation.
+    */
+    void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
+    void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
+    void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
+    void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
+    void (*h264_dct)(DCTELEM block[4][4]);
+    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+    /* snow wavelet */
+    void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
+    void (*horizontal_compose97i)(IDWTELEM *b, int width);
+    void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
+    void (*prefetch)(void *mem, int stride, int h);
+    void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+    /* vc1 functions */
+    void (*vc1_inv_trans_8x8)(DCTELEM *b);
+    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_v_overlap)(uint8_t* src, int stride);
+    void (*vc1_h_overlap)(uint8_t* src, int stride);
+    /* put 8x8 block with bicubic interpolation and quarterpel precision
+     * last argument is actually round value instead of height
+     */
+    op_pixels_func put_vc1_mspel_pixels_tab[16];
+    /* intrax8 functions */
+    void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
+    void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
+           int * range, int * sum,  int edges);
+    /* ape functions */
+    /**
+     * Add contents of the second vector to the first one.
+     * @param len length of vectors, should be multiple of 16
+     */
+    void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
+    /**
+     * Add contents of the second vector to the first one.
+     * @param len length of vectors, should be multiple of 16
+     */
+    void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
+    /**
+     * Calculate scalar product of two vectors.
+     * @param len length of vectors, should be multiple of 16
+     * @param shift number of bits to discard from product
+     */
+    int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift);
+    /* rv30 functions */
+    qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
+    qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
+    /* rv40 functions */
+    qpel_mc_func put_rv40_qpel_pixels_tab[4][16];
+    qpel_mc_func avg_rv40_qpel_pixels_tab[4][16];
+    h264_chroma_mc_func put_rv40_chroma_pixels_tab[3];
+    h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3];
+} DSPContext;
+void dsputil_static_init(void);
+void dsputil_init(DSPContext* p);
+int ff_check_alignment(void);
+/**
+ * permute block according to permuatation.
+ * @param last last non zero element in scantable order
+ */
+void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
+void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
+#define         BYTE_VEC32(c)   ((c)*0x01010101UL)
+static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+static inline int get_penalty_factor(int lambda, int lambda2, int type){
+    switch(type&0xFF){
+    default:
+    case FF_CMP_SAD:
+        return lambda>>FF_LAMBDA_SHIFT;
+    case FF_CMP_DCT:
+        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
+    case FF_CMP_W53:
+        return (4*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_W97:
+        return (2*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_SATD:
+    case FF_CMP_DCT264:
+        return (2*lambda)>>FF_LAMBDA_SHIFT;
+    case FF_CMP_RD:
+    case FF_CMP_PSNR:
+    case FF_CMP_SSE:
+    case FF_CMP_NSSE:
+        return lambda2>>FF_LAMBDA_SHIFT;
+    case FF_CMP_BIT:
+        return 1;
+    }
+}
+/**
+ * Empty mmx state.
+ * this must be called between any dsp function and float/double code.
+ * for example sin(); dsp->idct_put(); emms_c(); cos()
+ */
+#define emms_c()
+/* should be defined by architectures supporting
+   one or more MultiMedia extension */
+int mm_support(void);
+void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
+#define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v)
+#if HAVE_MMX
+#undef emms_c
+extern int mm_flags;
+void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+static inline void emms(void)
+{
+    __asm__ volatile ("emms;":::"memory");
+}
+#define emms_c() \
+{\
+    if (mm_flags & FF_MM_MMX)\
+        emms();\
+}
+void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
+#elif ARCH_ARM
+extern int mm_flags;
+#if HAVE_NEON
+#   define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
+#   define STRIDE_ALIGN 16
+#endif
+#elif ARCH_PPC
+extern int mm_flags;
+#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
+#define STRIDE_ALIGN 16
+#elif HAVE_MMI
+#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
+#define STRIDE_ALIGN 16
+#else
+#define mm_flags 0
+#define mm_support() 0
+#endif
+#ifndef DECLARE_ALIGNED_8
+#   define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(8, t, v)
+#endif
+#ifndef STRIDE_ALIGN
+#   define STRIDE_ALIGN 8
+#endif
+/* PSNR */
+void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
+              int orig_linesize[3], int coded_linesize,
+              AVCodecContext *avctx);
+/* FFT computation */
+/* NOTE: soon integer code will be added, so you must use the
+   FFTSample type */
+typedef float FFTSample;
+struct MDCTContext;
+typedef struct FFTComplex {
+    FFTSample re, im;
+} FFTComplex;
+typedef struct FFTContext {
+    int nbits;
+    int inverse;
+    uint16_t *revtab;
+    FFTComplex *exptab;
+    FFTComplex *exptab1; /* only used by SSE code */
+    FFTComplex *tmp_buf;
+    void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
+    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
+    void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input);
+    void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input);
+} FFTContext;
+extern FFTSample* ff_cos_tabs[13];
+/**
+ * Sets up a complex FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
+ */
+int ff_fft_init(FFTContext *s, int nbits, int inverse);
+void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
+void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
+{
+    s->fft_permute(s, z);
+}
+/**
+ * Do a complex FFT with the parameters defined in ff_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
+{
+    s->fft_calc(s, z);
+}
+void ff_fft_end(FFTContext *s);
+/* MDCT computation */
+typedef struct MDCTContext {
+    int n;  /* size of MDCT (i.e. number of input data * 2) */
+    int nbits; /* n = 2^nbits */
+    /* pre/post rotation tables */
+    FFTSample *tcos;
+    FFTSample *tsin;
+    FFTContext fft;
+} MDCTContext;
+static inline void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->fft.imdct_calc(s, output, input);
+}
+static inline void ff_imdct_half(MDCTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->fft.imdct_half(s, output, input);
+}
+/**
+ * Generate a Kaiser-Bessel Derived Window.
+ * @param   window  pointer to half window
+ * @param   alpha   determines window shape
+ * @param   n       size of half window
+ */
+void ff_kbd_window_init(float *window, float alpha, int n);
+/**
+ * Generate a sine window.
+ * @param   window  pointer to half window
+ * @param   n       size of half window
+ */
+void ff_sine_window_init(float *window, int n);
+extern float ff_sine_128 [ 128];
+extern float ff_sine_256 [ 256];
+extern float ff_sine_512 [ 512];
+extern float ff_sine_1024[1024];
+extern float ff_sine_2048[2048];
+extern float ff_sine_4096[4096];
+extern float *ff_sine_windows[6];
+int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
+void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input);
+void ff_mdct_end(MDCTContext *s);
+/* Real Discrete Fourier Transform */
+enum RDFTransformType {
+    RDFT,
+    IRDFT,
+    RIDFT,
+    IRIDFT,
+};
+typedef struct {
+    int nbits;
+    int inverse;
+    int sign_convention;
+    /* pre/post rotation tables */
+    FFTSample *tcos;
+    FFTSample *tsin;
+    FFTContext fft;
+} RDFTContext;
+/**
+ * Sets up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
+void ff_rdft_calc(RDFTContext *s, FFTSample *data);
+void ff_rdft_end(RDFTContext *s);
+#define WRAPPER8_16(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+    return name8(s, dst           , src           , stride, h)\
+          +name8(s, dst+8         , src+8         , stride, h);\
+}
+#define WRAPPER8_16_SQ(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+    int score=0;\
+    score +=name8(s, dst           , src           , stride, 8);\
+    score +=name8(s, dst+8         , src+8         , stride, 8);\
+    if(h==16){\
+        dst += 8*stride;\
+        src += 8*stride;\
+        score +=name8(s, dst           , src           , stride, 8);\
+        score +=name8(s, dst+8         , src+8         , stride, 8);\
+    }\
+    return score;\
+}
+static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN16(dst   , AV_RN16(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        dst[8]= src[8];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        AV_WN32(dst+8 , AV_RN32(src+8 ));
+        AV_WN32(dst+12, AV_RN32(src+12));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        AV_WN32(dst+8 , AV_RN32(src+8 ));
+        AV_WN32(dst+12, AV_RN32(src+12));
+        dst[16]= src[16];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+#endif /* AVCODEC_DSPUTIL_H */
diff --git a/apps/codecs/libatrac/ffmpeg_config.h b/apps/codecs/libatrac/ffmpeg_config.h
new file mode 100644
index 0000000000..707e14df52
--- /dev/null
+++ b/apps/codecs/libatrac/ffmpeg_config.h
@@ -0,0 +1,14 @@
+/* Automatically generated by configure - do not modify */
+#ifndef _FFMPEG_CONFIG_H
+#define _FFMPEG_CONFIG_H
+// CHECK THIS : #include "codecs.h"
+#ifdef CPU_ARM
+#define CONFIG_ALIGN 1
+#endif
+#ifdef ROCKBOX_BIG_ENDIAN
+#define WORDS_BIGENDIAN
+#endif
+#endif
diff --git a/apps/codecs/libatrac/fft.c b/apps/codecs/libatrac/fft.c
new file mode 100644
index 0000000000..a3f1151472
--- /dev/null
+++ b/apps/codecs/libatrac/fft.c
@@ -0,0 +1,374 @@
+/*
+ * FFT/IFFT transforms
+ * Copyright (c) 2008 Loren Merritt
+ * Copyright (c) 2002 Fabrice Bellard
+ * Partly based on libdjbfft by D. J. Bernstein
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/fft.c
+ * FFT/IFFT transforms.
+ */
+#include "dsputil.h"
+/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
+DECLARE_ALIGNED_16(FFTSample, ff_cos_16[8]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_32[16]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_64[32]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_128[64]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_256[128]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_512[256]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_1024[512]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_2048[1024]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_4096[2048]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_8192[4096]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_16384[8192]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_32768[16384]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_65536[32768]);
+FFTSample *ff_cos_tabs[] = {
+    ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024,
+    ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536,
+};
+static int split_radix_permutation(int i, int n, int inverse)
+{
+    int m;
+    if(n <= 2) return i&1;
+    m = n >> 1;
+    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
+    m >>= 1;
+    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
+    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
+}
+av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
+{
+    int i, j, m, n;
+    float alpha, c1, s1, s2;
+    int split_radix = 1;
+    int av_unused has_vectors;
+    if (nbits < 2 || nbits > 16)
+        goto fail;
+    s->nbits = nbits;
+    n = 1 << nbits;
+    s->tmp_buf = NULL;
+    s->exptab  = av_malloc((n / 2) * sizeof(FFTComplex));
+    if (!s->exptab)
+        goto fail;
+    s->revtab = av_malloc(n * sizeof(uint16_t));
+    if (!s->revtab)
+        goto fail;
+    s->inverse = inverse;
+    s2 = inverse ? 1.0 : -1.0;
+    s->fft_permute = ff_fft_permute_c;
+    s->fft_calc    = ff_fft_calc_c;
+    s->imdct_calc  = ff_imdct_calc_c;
+    s->imdct_half  = ff_imdct_half_c;
+    s->exptab1     = NULL;
+#if HAVE_MMX && HAVE_YASM
+    has_vectors = mm_support();
+    if (has_vectors & FF_MM_SSE && HAVE_SSE) {
+        /* SSE for P3/P4/K8 */
+        s->imdct_calc  = ff_imdct_calc_sse;
+        s->imdct_half  = ff_imdct_half_sse;
+        s->fft_permute = ff_fft_permute_sse;
+        s->fft_calc    = ff_fft_calc_sse;
+    } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) {
+        /* 3DNowEx for K7 */
+        s->imdct_calc = ff_imdct_calc_3dn2;
+        s->imdct_half = ff_imdct_half_3dn2;
+        s->fft_calc   = ff_fft_calc_3dn2;
+    } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) {
+        /* 3DNow! for K6-2/3 */
+        s->imdct_calc = ff_imdct_calc_3dn;
+        s->imdct_half = ff_imdct_half_3dn;
+        s->fft_calc   = ff_fft_calc_3dn;
+    }
+#elif HAVE_ALTIVEC
+    has_vectors = mm_support();
+    if (has_vectors & FF_MM_ALTIVEC) {
+        s->fft_calc = ff_fft_calc_altivec;
+        split_radix = 0;
+    }
+#endif
+    if (split_radix) {
+        for(j=4; j<=nbits; j++) {
+            int m = 1<<j;
+            double freq = 2*M_PI/m;
+            FFTSample *tab = ff_cos_tabs[j-4];
+            for(i=0; i<=m/4; i++)
+                tab[i] = cos(i*freq);
+            for(i=1; i<m/4; i++)
+                tab[m/2-i] = tab[i];
+        }
+        for(i=0; i<n; i++)
+            s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i;
+        s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
+    } else {
+        int np, nblocks, np2, l;
+        FFTComplex *q;
+        for(i=0; i<(n/2); i++) {
+            alpha = 2 * M_PI * (float)i / (float)n;
+            c1 = cos(alpha);
+            s1 = sin(alpha) * s2;
+            s->exptab[i].re = c1;
+            s->exptab[i].im = s1;
+        }
+        np = 1 << nbits;
+        nblocks = np >> 3;
+        np2 = np >> 1;
+        s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
+        if (!s->exptab1)
+            goto fail;
+        q = s->exptab1;
+        do {
+            for(l = 0; l < np2; l += 2 * nblocks) {
+                *q++ = s->exptab[l];
+                *q++ = s->exptab[l + nblocks];
+                q->re = -s->exptab[l].im;
+                q->im = s->exptab[l].re;
+                q++;
+                q->re = -s->exptab[l + nblocks].im;
+                q->im = s->exptab[l + nblocks].re;
+                q++;
+            }
+            nblocks = nblocks >> 1;
+        } while (nblocks != 0);
+        av_freep(&s->exptab);
+        /* compute bit reverse table */
+        for(i=0;i<n;i++) {
+            m=0;
+            for(j=0;j<nbits;j++) {
+                m |= ((i >> j) & 1) << (nbits-j-1);
+            }
+            s->revtab[i]=m;
+        }
+    }
+    return 0;
+ fail:
+    av_freep(&s->revtab);
+    av_freep(&s->exptab);
+    av_freep(&s->exptab1);
+    av_freep(&s->tmp_buf);
+    return -1;
+}
+void ff_fft_permute_c(FFTContext *s, FFTComplex *z)
+{
+    int j, k, np;
+    FFTComplex tmp;
+    const uint16_t *revtab = s->revtab;
+    np = 1 << s->nbits;
+    if (s->tmp_buf) {
+        /* TODO: handle split-radix permute in a more optimal way, probably in-place */
+        for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
+        memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
+        return;
+    }
+    /* reverse */
+    for(j=0;j<np;j++) {
+        k = revtab[j];
+        if (k < j) {
+            tmp = z[k];
+            z[k] = z[j];
+            z[j] = tmp;
+        }
+    }
+}
+av_cold void ff_fft_end(FFTContext *s)
+{
+    av_freep(&s->revtab);
+    av_freep(&s->exptab);
+    av_freep(&s->exptab1);
+    av_freep(&s->tmp_buf);
+}
+#define sqrthalf (float)M_SQRT1_2
+#define BF(x,y,a,b) {\
+    x = a - b;\
+    y = a + b;\
+}
+#define BUTTERFLIES(a0,a1,a2,a3) {\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, a0.re, t5);\
+    BF(a3.im, a1.im, a1.im, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, a1.re, t4);\
+    BF(a2.im, a0.im, a0.im, t6);\
+}
+// force loading all the inputs before storing any.
+// this is slightly slower for small data, but avoids store->load aliasing
+// for addresses separated by large powers of 2.
+#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
+    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, r0, t5);\
+    BF(a3.im, a1.im, i1, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, r1, t4);\
+    BF(a2.im, a0.im, i0, t6);\
+}
+#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
+    t1 = a2.re * wre + a2.im * wim;\
+    t2 = a2.im * wre - a2.re * wim;\
+    t5 = a3.re * wre - a3.im * wim;\
+    t6 = a3.im * wre + a3.re * wim;\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
+    t1 = a2.re;\
+    t2 = a2.im;\
+    t5 = a3.re;\
+    t6 = a3.im;\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+/* z[0...8n-1], w[1...2n-1] */
+#define PASS(name)\
+static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
+{\
+    FFTSample t1, t2, t3, t4, t5, t6;\
+    int o1 = 2*n;\
+    int o2 = 4*n;\
+    int o3 = 6*n;\
+    const FFTSample *wim = wre+o1;\
+    n--;\
+\
+    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
+    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    do {\
+        z += 2;\
+        wre += 2;\
+        wim -= 2;\
+        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
+        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    } while(--n);\
+}
+PASS(pass)
+#undef BUTTERFLIES
+#define BUTTERFLIES BUTTERFLIES_BIG
+PASS(pass_big)
+#define DECL_FFT(n,n2,n4)\
+static void fft##n(FFTComplex *z)\
+{\
+    fft##n2(z);\
+    fft##n4(z+n4*2);\
+    fft##n4(z+n4*3);\
+    pass(z,ff_cos_##n,n4/2);\
+}
+static void fft4(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+    BF(t3, t1, z[0].re, z[1].re);
+    BF(t8, t6, z[3].re, z[2].re);
+    BF(z[2].re, z[0].re, t1, t6);
+    BF(t4, t2, z[0].im, z[1].im);
+    BF(t7, t5, z[2].im, z[3].im);
+    BF(z[3].im, z[1].im, t4, t8);
+    BF(z[3].re, z[1].re, t3, t7);
+    BF(z[2].im, z[0].im, t2, t5);
+}
+static void fft8(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+    fft4(z);
+    BF(t1, z[5].re, z[4].re, -z[5].re);
+    BF(t2, z[5].im, z[4].im, -z[5].im);
+    BF(t3, z[7].re, z[6].re, -z[7].re);
+    BF(t4, z[7].im, z[6].im, -z[7].im);
+    BF(t8, t1, t3, t1);
+    BF(t7, t2, t2, t4);
+    BF(z[4].re, z[0].re, z[0].re, t1);
+    BF(z[4].im, z[0].im, z[0].im, t2);
+    BF(z[6].re, z[2].re, z[2].re, t7);
+    BF(z[6].im, z[2].im, z[2].im, t8);
+    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
+}
+#if !CONFIG_SMALL
+static void fft16(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6;
+    fft8(z);
+    fft4(z+8);
+    fft4(z+12);
+    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
+    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
+    TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]);
+    TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]);
+}
+#else
+DECL_FFT(16,8,4)
+#endif
+DECL_FFT(32,16,8)
+DECL_FFT(64,32,16)
+DECL_FFT(128,64,32)
+DECL_FFT(256,128,64)
+DECL_FFT(512,256,128)
+#if !CONFIG_SMALL
+#define pass pass_big
+#endif
+DECL_FFT(1024,512,256)
+DECL_FFT(2048,1024,512)
+DECL_FFT(4096,2048,1024)
+DECL_FFT(8192,4096,2048)
+DECL_FFT(16384,8192,4096)
+DECL_FFT(32768,16384,8192)
+DECL_FFT(65536,32768,16384)
+static void (*fft_dispatch[])(FFTComplex*) = {
+    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
+    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
+};
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
+{
+    fft_dispatch[s->nbits-2](z);
+}
diff --git a/apps/codecs/libatrac/libavutil/avutil.h b/apps/codecs/libatrac/libavutil/avutil.h
new file mode 100644
index 0000000000..c07e44d660
--- /dev/null
+++ b/apps/codecs/libatrac/libavutil/avutil.h
@@ -0,0 +1,63 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVUTIL_AVUTIL_H
+#define AVUTIL_AVUTIL_H
+/**
+ * @file libavutil/avutil.h
+ * external API header
+ */
+#define AV_STRINGIFY(s)         AV_TOSTRING(s)
+#define AV_TOSTRING(s) #s
+#define AV_VERSION_INT(a, b, c) (a<<16 | b<<8 | c)
+#define AV_VERSION_DOT(a, b, c) a ##.## b ##.## c
+#define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
+#define LIBAVUTIL_VERSION_MAJOR 50
+#define LIBAVUTIL_VERSION_MINOR  0
+#define LIBAVUTIL_VERSION_MICRO  0
+#define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
+                                               LIBAVUTIL_VERSION_MINOR, \
+                                               LIBAVUTIL_VERSION_MICRO)
+#define LIBAVUTIL_VERSION       AV_VERSION(LIBAVUTIL_VERSION_MAJOR,     \
+                                           LIBAVUTIL_VERSION_MINOR,     \
+                                           LIBAVUTIL_VERSION_MICRO)
+#define LIBAVUTIL_BUILD         LIBAVUTIL_VERSION_INT
+#define LIBAVUTIL_IDENT         "Lavu" AV_STRINGIFY(LIBAVUTIL_VERSION)
+/**
+ * Returns the LIBAVUTIL_VERSION_INT constant.
+ */
+unsigned avutil_version(void);
+#include "common.h"
+//#include "mathematics.h"
+//#include "rational.h"
+//#include "intfloat_readwrite.h"
+#include "log.h"
+//#include "pixfmt.h"
+#endif /* AVUTIL_AVUTIL_H */
diff --git a/apps/codecs/libatrac/libavutil/bswap.h b/apps/codecs/libatrac/libavutil/bswap.h
new file mode 100644
index 0000000000..9175cb24a5
--- /dev/null
+++ b/apps/codecs/libatrac/libavutil/bswap.h
@@ -0,0 +1,99 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/bswap.h
+ * byte swapping routines
+ */
+#ifndef AVUTIL_BSWAP_H
+#define AVUTIL_BSWAP_H
+#include <stdint.h>
+//#include "ffmpeg_config.h"
+#include "common.h"
+#if   ARCH_ARM
+#   include "arm/bswap.h"
+#elif ARCH_BFIN
+#   include "bfin/bswap.h"
+#elif ARCH_SH4
+#   include "sh4/bswap.h"
+#elif ARCH_X86
+#   include "x86/bswap.h"
+#endif
+#ifndef bswap_16
+static av_always_inline av_const uint16_t bswap_16(uint16_t x)
+{
+    x= (x>>8) | (x<<8);
+    return x;
+}
+#endif
+#ifndef bswap_32
+static av_always_inline av_const uint32_t bswap_32(uint32_t x)
+{
+    x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
+    x= (x>>16) | (x<<16);
+    return x;
+}
+#endif
+#ifndef bswap_64
+static inline uint64_t av_const bswap_64(uint64_t x)
+{
+#if 0
+    x= ((x<< 8)&0xFF00FF00FF00FF00ULL) | ((x>> 8)&0x00FF00FF00FF00FFULL);
+    x= ((x<<16)&0xFFFF0000FFFF0000ULL) | ((x>>16)&0x0000FFFF0000FFFFULL);
+    return (x>>32) | (x<<32);
+#else
+    union {
+        uint64_t ll;
+        uint32_t l[2];
+    } w, r;
+    w.ll = x;
+    r.l[0] = bswap_32 (w.l[1]);
+    r.l[1] = bswap_32 (w.l[0]);
+    return r.ll;
+#endif
+}
+#endif
+// be2me ... big-endian to machine-endian
+// le2me ... little-endian to machine-endian
+#ifdef WORDS_BIGENDIAN
+#define be2me_16(x) (x)
+#define be2me_32(x) (x)
+#define be2me_64(x) (x)
+#define le2me_16(x) bswap_16(x)
+#define le2me_32(x) bswap_32(x)
+#define le2me_64(x) bswap_64(x)
+#else
+#define be2me_16(x) bswap_16(x)
+#define be2me_32(x) bswap_32(x)
+#define be2me_64(x) bswap_64(x)
+#define le2me_16(x) (x)
+#define le2me_32(x) (x)
+#define le2me_64(x) (x)
+#endif
+#endif /* AVUTIL_BSWAP_H */
diff --git a/apps/codecs/libatrac/libavutil/common.h b/apps/codecs/libatrac/libavutil/common.h
new file mode 100644
index 0000000000..949f093d35
--- /dev/null
+++ b/apps/codecs/libatrac/libavutil/common.h
@@ -0,0 +1,286 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/common.h
+ * common internal and external API header
+ */
+#ifndef AVUTIL_COMMON_H
+#define AVUTIL_COMMON_H
+#include <ctype.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef __GNUC__
+#    define AV_GCC_VERSION_AT_LEAST(x,y) (__GNUC__ > x || __GNUC__ == x && __GNUC_MINOR__ >= y)
+#else
+#    define AV_GCC_VERSION_AT_LEAST(x,y) 0
+#endif
+#ifndef av_always_inline
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define av_always_inline __attribute__((always_inline)) inline
+#else
+#    define av_always_inline inline
+#endif
+#endif
+#ifndef av_noinline
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define av_noinline __attribute__((noinline))
+#else
+#    define av_noinline
+#endif
+#endif
+#ifndef av_pure
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define av_pure __attribute__((pure))
+#else
+#    define av_pure
+#endif
+#endif
+#ifndef av_const
+#if AV_GCC_VERSION_AT_LEAST(2,6)
+#    define av_const __attribute__((const))
+#else
+#    define av_const
+#endif
+#endif
+#ifndef av_cold
+#if (!defined(__ICC) || __ICC > 1100) && AV_GCC_VERSION_AT_LEAST(4,3)
+#    define av_cold __attribute__((cold))
+#else
+#    define av_cold
+#endif
+#endif
+#ifndef av_flatten
+#if AV_GCC_VERSION_AT_LEAST(4,1)
+#    define av_flatten __attribute__((flatten))
+#else
+#    define av_flatten
+#endif
+#endif
+#ifndef attribute_deprecated
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define attribute_deprecated __attribute__((deprecated))
+#else
+#    define attribute_deprecated
+#endif
+#endif
+#ifndef av_unused
+#if defined(__GNUC__)
+#    define av_unused __attribute__((unused))
+#else
+#    define av_unused
+#endif
+#endif
+#ifndef av_uninit
+#if defined(__GNUC__) && !defined(__ICC)
+#    define av_uninit(x) x=x
+#else
+#    define av_uninit(x) x
+#endif
+#endif
+//rounded division & shift
+#define RSHIFT(a,b) ((a) > 0 ? ((a) + ((1<<(b))>>1))>>(b) : ((a) + ((1<<(b))>>1)-1)>>(b))
+/* assume b>0 */
+#define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
+#define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
+#define FFSIGN(a) ((a) > 0 ? 1 : -1)
+#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
+#define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
+#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
+#define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)
+#define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
+#define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
+/* misc math functions */
+extern const uint8_t ff_log2_tab[256];
+static inline av_const int av_log2(unsigned int v)
+{
+    int n = 0;
+    if (v & 0xffff0000) {
+        v >>= 16;
+        n += 16;
+    }
+    if (v & 0xff00) {
+        v >>= 8;
+        n += 8;
+    }
+    n += ff_log2_tab[v];
+    return n;
+}
+static inline av_const int av_log2_16bit(unsigned int v)
+{
+    int n = 0;
+    if (v & 0xff00) {
+        v >>= 8;
+        n += 8;
+    }
+    n += ff_log2_tab[v];
+    return n;
+}
+/**
+ * Clips a signed integer value into the amin-amax range.
+ * @param a value to clip
+ * @param amin minimum value of the clip range
+ * @param amax maximum value of the clip range
+ * @return clipped value
+ */
+static inline av_const int av_clip(int a, int amin, int amax)
+{
+    if      (a < amin) return amin;
+    else if (a > amax) return amax;
+    else               return a;
+}
+/**
+ * Clips a signed integer value into the 0-255 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static inline av_const uint8_t av_clip_uint8(int a)
+{
+    if (a&(~255)) return (-a)>>31;
+    else          return a;
+}
+/**
+ * Clips a signed integer value into the -32768,32767 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static inline av_const int16_t av_clip_int16(int a)
+{
+    if ((a+32768) & ~65535) return (a>>31) ^ 32767;
+    else                    return a;
+}
+/**
+ * Clips a float value into the amin-amax range.
+ * @param a value to clip
+ * @param amin minimum value of the clip range
+ * @param amax maximum value of the clip range
+ * @return clipped value
+ */
+static inline av_const float av_clipf(float a, float amin, float amax)
+{
+    if      (a < amin) return amin;
+    else if (a > amax) return amax;
+    else               return a;
+}
+#define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24))
+#define MKBETAG(a,b,c,d) (d | (c << 8) | (b << 16) | (a << 24))
+/*!
+ * \def GET_UTF8(val, GET_BYTE, ERROR)
+ * Converts a UTF-8 character (up to 4 bytes long) to its 32-bit UCS-4 encoded form
+ * \param val is the output and should be of type uint32_t. It holds the converted
+ * UCS-4 character and should be a left value.
+ * \param GET_BYTE gets UTF-8 encoded bytes from any proper source. It can be
+ * a function or a statement whose return value or evaluated value is of type
+ * uint8_t. It will be executed up to 4 times for values in the valid UTF-8 range,
+ * and up to 7 times in the general case.
+ * \param ERROR action that should be taken when an invalid UTF-8 byte is returned
+ * from GET_BYTE. It should be a statement that jumps out of the macro,
+ * like exit(), goto, return, break, or continue.
+ */
+#define GET_UTF8(val, GET_BYTE, ERROR)\
+    val= GET_BYTE;\
+    {\
+        int ones= 7 - av_log2(val ^ 255);\
+        if(ones==1)\
+            ERROR\
+        val&= 127>>ones;\
+        while(--ones > 0){\
+            int tmp= GET_BYTE - 128;\
+            if(tmp>>6)\
+                ERROR\
+            val= (val<<6) + tmp;\
+        }\
+    }
+/*!
+ * \def PUT_UTF8(val, tmp, PUT_BYTE)
+ * Converts a 32-bit Unicode character to its UTF-8 encoded form (up to 4 bytes long).
+ * \param val is an input-only argument and should be of type uint32_t. It holds
+ * a UCS-4 encoded Unicode character that is to be converted to UTF-8. If
+ * val is given as a function it is executed only once.
+ * \param tmp is a temporary variable and should be of type uint8_t. It
+ * represents an intermediate value during conversion that is to be
+ * output by PUT_BYTE.
+ * \param PUT_BYTE writes the converted UTF-8 bytes to any proper destination.
+ * It could be a function or a statement, and uses tmp as the input byte.
+ * For example, PUT_BYTE could be "*output++ = tmp;" PUT_BYTE will be
+ * executed up to 4 times for values in the valid UTF-8 range and up to
+ * 7 times in the general case, depending on the length of the converted
+ * Unicode character.
+ */
+#define PUT_UTF8(val, tmp, PUT_BYTE)\
+    {\
+        int bytes, shift;\
+        uint32_t in = val;\
+        if (in < 0x80) {\
+            tmp = in;\
+            PUT_BYTE\
+        } else {\
+            bytes = (av_log2(in) + 4) / 5;\
+            shift = (bytes - 1) * 6;\
+            tmp = (256 - (256 >> bytes)) | (in >> shift);\
+            PUT_BYTE\
+            while (shift >= 6) {\
+                shift -= 6;\
+                tmp = 0x80 | ((in >> shift) & 0x3f);\
+                PUT_BYTE\
+            }\
+        }\
+    }
+#include "mem.h"
+//#ifdef HAVE_AV_CONFIG_H
+//#    include "ffmpeg_config.h"
+#    include "internal.h"
+//#endif /* HAVE_AV_CONFIG_H */
+#endif /* AVUTIL_COMMON_H */
diff --git a/apps/codecs/libatrac/libavutil/internal.h b/apps/codecs/libatrac/libavutil/internal.h
new file mode 100644
index 0000000000..c28b6f9a5e
--- /dev/null
+++ b/apps/codecs/libatrac/libavutil/internal.h
@@ -0,0 +1,328 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/internal.h
+ * common internal API header
+ */
+#ifndef AVUTIL_INTERNAL_H
+#define AVUTIL_INTERNAL_H
+#if !defined(DEBUG) && !defined(NDEBUG)
+#    define NDEBUG
+#endif
+#include <limits.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <assert.h>
+//#include "ffmpeg_config.h"
+#include "common.h"
+#include "mem.h"
+//#include "timer.h"
+#ifndef attribute_align_arg
+#if (!defined(__ICC) || __ICC > 1100) && AV_GCC_VERSION_AT_LEAST(4,2)
+#    define attribute_align_arg __attribute__((force_align_arg_pointer))
+#else
+#    define attribute_align_arg
+#endif
+#endif
+#ifndef attribute_used
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define attribute_used __attribute__((used))
+#else
+#    define attribute_used
+#endif
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN       (-0x7fff-1)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX       0x7fff
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN       (-0x7fffffff-1)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX       0x7fffffff
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX      0xffffffff
+#endif
+#ifndef INT64_MIN
+#define INT64_MIN       (-0x7fffffffffffffffLL-1)
+#endif
+#ifndef INT64_MAX
+#define INT64_MAX INT64_C(9223372036854775807)
+#endif
+#ifndef UINT64_MAX
+#define UINT64_MAX UINT64_C(0xFFFFFFFFFFFFFFFF)
+#endif
+#ifndef INT_BIT
+#    define INT_BIT (CHAR_BIT * sizeof(int))
+#endif
+#if ( defined(__PIC__) || defined(__pic__) ) && ! defined(PIC)
+#    define PIC
+#endif
+#ifndef offsetof
+#    define offsetof(T,F) ((unsigned int)((char *)&((T *)0)->F))
+#endif
+// Use rip-relative addressing if compiling PIC code on x86-64.
+#if ARCH_X86_64 && defined(PIC)
+#    define LOCAL_MANGLE(a) #a "(%%rip)"
+#else
+#    define LOCAL_MANGLE(a) #a
+#endif
+#define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a)
+/* debug stuff */
+/* dprintf macros */
+#ifdef DEBUG
+#    define dprintf(pctx, ...) av_log(pctx, AV_LOG_DEBUG, __VA_ARGS__)
+#else
+#    define dprintf(pctx, ...)
+#endif
+#define av_abort()      do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0)
+/* math */
+extern const uint32_t ff_inverse[256];
+#if ARCH_X86
+#    define FASTDIV(a,b) \
+    ({\
+        int ret,dmy;\
+        __asm__ volatile(\
+            "mull %3"\
+            :"=d"(ret),"=a"(dmy)\
+            :"1"(a),"g"(ff_inverse[b])\
+            );\
+        ret;\
+    })
+#elif HAVE_ARMV6 && HAVE_INLINE_ASM
+static inline av_const int FASTDIV(int a, int b)
+{
+    int r, t;
+    __asm__ volatile("cmp     %3, #2               \n\t"
+                     "ldr     %1, [%4, %3, lsl #2] \n\t"
+                     "lsrle   %0, %2, #1           \n\t"
+                     "smmulgt %0, %1, %2           \n\t"
+                     : "=&r"(r), "=&r"(t) : "r"(a), "r"(b), "r"(ff_inverse));
+    return r;
+}
+#elif ARCH_ARM && HAVE_INLINE_ASM
+static inline av_const int FASTDIV(int a, int b)
+{
+    int r, t;
+    __asm__ volatile ("umull %1, %0, %2, %3"
+                      : "=&r"(r), "=&r"(t) : "r"(a), "r"(ff_inverse[b]));
+    return r;
+}
+#elif CONFIG_FASTDIV
+#    define FASTDIV(a,b)   ((uint32_t)((((uint64_t)a)*ff_inverse[b])>>32))
+#else
+#    define FASTDIV(a,b)   ((a)/(b))
+#endif
+extern const uint8_t ff_sqrt_tab[256];
+static inline av_const unsigned int ff_sqrt(unsigned int a)
+{
+    unsigned int b;
+    if(a<255) return (ff_sqrt_tab[a+1]-1)>>4;
+    else if(a<(1<<12)) b= ff_sqrt_tab[a>>4 ]>>2;
+#if !CONFIG_SMALL
+    else if(a<(1<<14)) b= ff_sqrt_tab[a>>6 ]>>1;
+    else if(a<(1<<16)) b= ff_sqrt_tab[a>>8 ]   ;
+#endif
+    else{
+        int s= av_log2_16bit(a>>16)>>1;
+        unsigned int c= a>>(s+2);
+        b= ff_sqrt_tab[c>>(s+8)];
+        b= FASTDIV(c,b) + (b<<s);
+    }
+    return b - (a<b*b);
+}
+#if ARCH_X86
+#define MASK_ABS(mask, level)\
+            __asm__ volatile(\
+                "cltd                   \n\t"\
+                "xorl %1, %0            \n\t"\
+                "subl %1, %0            \n\t"\
+                : "+a" (level), "=&d" (mask)\
+            );
+#else
+#define MASK_ABS(mask, level)\
+            mask= level>>31;\
+            level= (level^mask)-mask;
+#endif
+#if HAVE_CMOV
+#define COPY3_IF_LT(x,y,a,b,c,d)\
+__asm__ volatile (\
+    "cmpl %0, %3        \n\t"\
+    "cmovl %3, %0       \n\t"\
+    "cmovl %4, %1       \n\t"\
+    "cmovl %5, %2       \n\t"\
+    : "+&r" (x), "+&r" (a), "+r" (c)\
+    : "r" (y), "r" (b), "r" (d)\
+);
+#else
+#define COPY3_IF_LT(x,y,a,b,c,d)\
+if((y)<(x)){\
+     (x)=(y);\
+     (a)=(b);\
+     (c)=(d);\
+}
+#endif
+/* avoid usage of dangerous/inappropriate system functions */
+#undef  malloc
+#define malloc please_use_av_malloc
+#undef  free
+#define free please_use_av_free
+#undef  realloc
+#define realloc please_use_av_realloc
+#undef  time
+#define time time_is_forbidden_due_to_security_issues
+//#undef  rand
+//#define rand rand_is_forbidden_due_to_state_trashing_use_av_random
+//#undef  srand
+//#define srand srand_is_forbidden_due_to_state_trashing_use_av_random_init
+#undef  random
+#define random random_is_forbidden_due_to_state_trashing_use_av_random
+#undef  sprintf
+#define sprintf sprintf_is_forbidden_due_to_security_issues_use_snprintf
+#undef  strcat
+#define strcat strcat_is_forbidden_due_to_security_issues_use_av_strlcat
+#undef  exit
+#define exit exit_is_forbidden
+#ifndef LIBAVFORMAT_BUILD
+//#undef  printf
+//#define printf please_use_av_log_instead_of_printf
+#undef  fprintf
+#define fprintf please_use_av_log_instead_of_fprintf
+#undef  puts
+#define puts please_use_av_log_instead_of_puts
+#undef  perror
+#define perror please_use_av_log_instead_of_perror
+#endif
+#define CHECKED_ALLOCZ(p, size)\
+{\
+    p= av_mallocz(size);\
+    if(p==NULL && (size)!=0){\
+        av_log(NULL, AV_LOG_ERROR, "Cannot allocate memory.");\
+        goto fail;\
+    }\
+}
+#if defined(__ICC) || defined(__SUNPRO_C)
+    #define DECLARE_ALIGNED(n,t,v)      t v __attribute__ ((aligned (n)))
+    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (n))) v
+#elif defined(__GNUC__)
+    #define DECLARE_ALIGNED(n,t,v)      t v __attribute__ ((aligned (n)))
+    #define DECLARE_ASM_CONST(n,t,v)    static const t v attribute_used __attribute__ ((aligned (n)))
+#elif defined(_MSC_VER)
+    #define DECLARE_ALIGNED(n,t,v)      __declspec(align(n)) t v
+    #define DECLARE_ASM_CONST(n,t,v)    __declspec(align(n)) static const t v
+#elif HAVE_INLINE_ASM
+    #error The asm code needs alignment, but we do not know how to do it for this compiler.
+#else
+    #define DECLARE_ALIGNED(n,t,v)      t v
+    #define DECLARE_ASM_CONST(n,t,v)    static const t v
+#endif
+#if !HAVE_LLRINT
+static av_always_inline av_const long long llrint(double x)
+{
+    return rint(x);
+}
+#endif /* HAVE_LLRINT */
+#if !HAVE_LRINT
+static av_always_inline av_const long int lrint(double x)
+{
+    return rint(x);
+}
+#endif /* HAVE_LRINT */
+#if !HAVE_LRINTF
+static av_always_inline av_const long int lrintf(float x)
+{
+    return (int)(rint(x));
+}
+#endif /* HAVE_LRINTF */
+#if !HAVE_ROUND
+static av_always_inline av_const double round(double x)
+{
+    return (x > 0) ? floor(x + 0.5) : ceil(x - 0.5);
+}
+#endif /* HAVE_ROUND */
+#if !HAVE_ROUNDF
+static av_always_inline av_const float roundf(float x)
+{
+    return (x > 0) ? floor(x + 0.5) : ceil(x - 0.5);
+}
+#endif /* HAVE_ROUNDF */
+#if !HAVE_TRUNCF
+static av_always_inline av_const float truncf(float x)
+{
+    return (x > 0) ? floor(x) : ceil(x);
+}
+#endif /* HAVE_TRUNCF */
+/**
+ * Returns NULL if CONFIG_SMALL is true, otherwise the argument
+ * without modification. Used to disable the definition of strings
+ * (for example AVCodec long_names).
+ */
+#if CONFIG_SMALL
+#   define NULL_IF_CONFIG_SMALL(x) NULL
+#else
+#   define NULL_IF_CONFIG_SMALL(x) x
+#endif
+#endif /* AVUTIL_INTERNAL_H */
diff --git a/apps/codecs/libatrac/libavutil/intreadwrite.h b/apps/codecs/libatrac/libavutil/intreadwrite.h
new file mode 100644
index 0000000000..d27a50061e
--- /dev/null
+++ b/apps/codecs/libatrac/libavutil/intreadwrite.h
@@ -0,0 +1,192 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVUTIL_INTREADWRITE_H
+#define AVUTIL_INTREADWRITE_H
+#include <stdint.h>
+//#include "ffmpeg_config.h"
+#include "bswap.h"
+#ifdef __GNUC__
+struct unaligned_64 { uint64_t l; } __attribute__((packed));
+struct unaligned_32 { uint32_t l; } __attribute__((packed));
+struct unaligned_16 { uint16_t l; } __attribute__((packed));
+#define AV_RN16(a) (((const struct unaligned_16 *) (a))->l)
+#define AV_RN32(a) (((const struct unaligned_32 *) (a))->l)
+#define AV_RN64(a) (((const struct unaligned_64 *) (a))->l)
+#define AV_WN16(a, b) (((struct unaligned_16 *) (a))->l) = (b)
+#define AV_WN32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
+#define AV_WN64(a, b) (((struct unaligned_64 *) (a))->l) = (b)
+#elif defined(__DECC)
+#define AV_RN16(a) (*((const __unaligned uint16_t*)(a)))
+#define AV_RN32(a) (*((const __unaligned uint32_t*)(a)))
+#define AV_RN64(a) (*((const __unaligned uint64_t*)(a)))
+#define AV_WN16(a, b) *((__unaligned uint16_t*)(a)) = (b)
+#define AV_WN32(a, b) *((__unaligned uint32_t*)(a)) = (b)
+#define AV_WN64(a, b) *((__unaligned uint64_t*)(a)) = (b)
+#else
+#define AV_RN16(a) (*((const uint16_t*)(a)))
+#define AV_RN32(a) (*((const uint32_t*)(a)))
+#define AV_RN64(a) (*((const uint64_t*)(a)))
+#define AV_WN16(a, b) *((uint16_t*)(a)) = (b)
+#define AV_WN32(a, b) *((uint32_t*)(a)) = (b)
+#define AV_WN64(a, b) *((uint64_t*)(a)) = (b)
+#endif /* !__GNUC__ */
+/* endian macros */
+#define AV_RB8(x)     (((const uint8_t*)(x))[0])
+#define AV_WB8(p, d)  do { ((uint8_t*)(p))[0] = (d); } while(0)
+#define AV_RL8(x)     AV_RB8(x)
+#define AV_WL8(p, d)  AV_WB8(p, d)
+#if HAVE_FAST_UNALIGNED
+# ifdef WORDS_BIGENDIAN
+#  define AV_RB16(x)    AV_RN16(x)
+#  define AV_WB16(p, d) AV_WN16(p, d)
+#  define AV_RL16(x)    bswap_16(AV_RN16(x))
+#  define AV_WL16(p, d) AV_WN16(p, bswap_16(d))
+#  define AV_RB32(x)    AV_RN32(x)
+#  define AV_WB32(p, d) AV_WN32(p, d)
+#  define AV_RL32(x)    bswap_32(AV_RN32(x))
+#  define AV_WL32(p, d) AV_WN32(p, bswap_32(d))
+#  define AV_RB64(x)    AV_RN64(x)
+#  define AV_WB64(p, d) AV_WN64(p, d)
+#  define AV_RL64(x)    bswap_64(AV_RN64(x))
+#  define AV_WL64(p, d) AV_WN64(p, bswap_64(d))
+# else /* WORDS_BIGENDIAN */
+#  define AV_RB16(x)    bswap_16(AV_RN16(x))
+#  define AV_WB16(p, d) AV_WN16(p, bswap_16(d))
+#  define AV_RL16(x)    AV_RN16(x)
+#  define AV_WL16(p, d) AV_WN16(p, d)
+#  define AV_RB32(x)    bswap_32(AV_RN32(x))
+#  define AV_WB32(p, d) AV_WN32(p, bswap_32(d))
+#  define AV_RL32(x)    AV_RN32(x)
+#  define AV_WL32(p, d) AV_WN32(p, d)
+#  define AV_RB64(x)    bswap_64(AV_RN64(x))
+#  define AV_WB64(p, d) AV_WN64(p, bswap_64(d))
+#  define AV_RL64(x)    AV_RN64(x)
+#  define AV_WL64(p, d) AV_WN64(p, d)
+# endif
+#else /* HAVE_FAST_UNALIGNED */
+#define AV_RB16(x)  ((((const uint8_t*)(x))[0] << 8) | ((const uint8_t*)(x))[1])
+#define AV_WB16(p, d) do { \
+                    ((uint8_t*)(p))[1] = (d); \
+                    ((uint8_t*)(p))[0] = (d)>>8; } while(0)
+#define AV_RL16(x)  ((((const uint8_t*)(x))[1] << 8) | \
+                      ((const uint8_t*)(x))[0])
+#define AV_WL16(p, d) do { \
+                    ((uint8_t*)(p))[0] = (d); \
+                    ((uint8_t*)(p))[1] = (d)>>8; } while(0)
+#define AV_RB32(x)  ((((const uint8_t*)(x))[0] << 24) | \
+                     (((const uint8_t*)(x))[1] << 16) | \
+                     (((const uint8_t*)(x))[2] <<  8) | \
+                      ((const uint8_t*)(x))[3])
+#define AV_WB32(p, d) do { \
+                    ((uint8_t*)(p))[3] = (d); \
+                    ((uint8_t*)(p))[2] = (d)>>8; \
+                    ((uint8_t*)(p))[1] = (d)>>16; \
+                    ((uint8_t*)(p))[0] = (d)>>24; } while(0)
+#define AV_RL32(x) ((((const uint8_t*)(x))[3] << 24) | \
+                    (((const uint8_t*)(x))[2] << 16) | \
+                    (((const uint8_t*)(x))[1] <<  8) | \
+                     ((const uint8_t*)(x))[0])
+#define AV_WL32(p, d) do { \
+                    ((uint8_t*)(p))[0] = (d); \
+                    ((uint8_t*)(p))[1] = (d)>>8; \
+                    ((uint8_t*)(p))[2] = (d)>>16; \
+                    ((uint8_t*)(p))[3] = (d)>>24; } while(0)
+#define AV_RB64(x)  (((uint64_t)((const uint8_t*)(x))[0] << 56) | \
+                     ((uint64_t)((const uint8_t*)(x))[1] << 48) | \
+                     ((uint64_t)((const uint8_t*)(x))[2] << 40) | \
+                     ((uint64_t)((const uint8_t*)(x))[3] << 32) | \
+                     ((uint64_t)((const uint8_t*)(x))[4] << 24) | \
+                     ((uint64_t)((const uint8_t*)(x))[5] << 16) | \
+                     ((uint64_t)((const uint8_t*)(x))[6] <<  8) | \
+                      (uint64_t)((const uint8_t*)(x))[7])
+#define AV_WB64(p, d) do { \
+                    ((uint8_t*)(p))[7] = (d);     \
+                    ((uint8_t*)(p))[6] = (d)>>8;  \
+                    ((uint8_t*)(p))[5] = (d)>>16; \
+                    ((uint8_t*)(p))[4] = (d)>>24; \
+                    ((uint8_t*)(p))[3] = (d)>>32; \
+                    ((uint8_t*)(p))[2] = (d)>>40; \
+                    ((uint8_t*)(p))[1] = (d)>>48; \
+                    ((uint8_t*)(p))[0] = (d)>>56; } while(0)
+#define AV_RL64(x)  (((uint64_t)((const uint8_t*)(x))[7] << 56) | \
+                     ((uint64_t)((const uint8_t*)(x))[6] << 48) | \
+                     ((uint64_t)((const uint8_t*)(x))[5] << 40) | \
+                     ((uint64_t)((const uint8_t*)(x))[4] << 32) | \
+                     ((uint64_t)((const uint8_t*)(x))[3] << 24) | \
+                     ((uint64_t)((const uint8_t*)(x))[2] << 16) | \
+                     ((uint64_t)((const uint8_t*)(x))[1] <<  8) | \
+                      (uint64_t)((const uint8_t*)(x))[0])
+#define AV_WL64(p, d) do { \
+                    ((uint8_t*)(p))[0] = (d);     \
+                    ((uint8_t*)(p))[1] = (d)>>8;  \
+                    ((uint8_t*)(p))[2] = (d)>>16; \
+                    ((uint8_t*)(p))[3] = (d)>>24; \
+                    ((uint8_t*)(p))[4] = (d)>>32; \
+                    ((uint8_t*)(p))[5] = (d)>>40; \
+                    ((uint8_t*)(p))[6] = (d)>>48; \
+                    ((uint8_t*)(p))[7] = (d)>>56; } while(0)
+#endif  /* HAVE_FAST_UNALIGNED */
+#define AV_RB24(x)  ((((const uint8_t*)(x))[0] << 16) | \
+                     (((const uint8_t*)(x))[1] <<  8) | \
+                      ((const uint8_t*)(x))[2])
+#define AV_WB24(p, d) do { \
+                    ((uint8_t*)(p))[2] = (d); \
+                    ((uint8_t*)(p))[1] = (d)>>8; \
+                    ((uint8_t*)(p))[0] = (d)>>16; } while(0)
+#define AV_RL24(x)  ((((const uint8_t*)(x))[2] << 16) | \
+                     (((const uint8_t*)(x))[1] <<  8) | \
+                      ((const uint8_t*)(x))[0])
+#define AV_WL24(p, d) do { \
+                    ((uint8_t*)(p))[0] = (d); \
+                    ((uint8_t*)(p))[1] = (d)>>8; \
+                    ((uint8_t*)(p))[2] = (d)>>16; } while(0)
+#endif /* AVUTIL_INTREADWRITE_H */
diff --git a/apps/codecs/libatrac/libavutil/log.c b/apps/codecs/libatrac/libavutil/log.c
new file mode 100644
index 0000000000..4bb9652c2c
--- /dev/null
+++ b/apps/codecs/libatrac/libavutil/log.c
@@ -0,0 +1,89 @@
+/*
+ * log functions
+ * Copyright (c) 2003 Michel Bardiaux
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/log.c
+ * logging functions
+ */
+#include "avutil.h"
+#include "log.h"
+int av_log_level = AV_LOG_INFO;
+void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl)
+{
+    static int print_prefix=1;
+    static int count;
+    static char line[1024], prev[1024];
+    AVClass* avc= ptr ? *(AVClass**)ptr : NULL;
+    if(level>av_log_level)
+        return;
+#undef fprintf
+    if(print_prefix && avc) {
+        snprintf(line, sizeof(line), "[%s @ %p]", avc->item_name(ptr), ptr);
+    }else
+        line[0]=0;
+    vsnprintf(line + strlen(line), sizeof(line) - strlen(line), fmt, vl);
+    print_prefix= line[strlen(line)-1] == '\n';
+    if(print_prefix && !strcmp(line, prev)){
+        count++;
+        return;
+    }
+    if(count>0){
+        fprintf(stderr, "    Last message repeated %d times\n", count);
+        count=0;
+    }
+    fputs(line, stderr);
+    strcpy(prev, line);
+}
+static void (*av_log_callback)(void*, int, const char*, va_list) = av_log_default_callback;
+void av_log(void* avcl, int level, const char *fmt, ...)
+{
+    va_list vl;
+    va_start(vl, fmt);
+    av_vlog(avcl, level, fmt, vl);
+    va_end(vl);
+}
+void av_vlog(void* avcl, int level, const char *fmt, va_list vl)
+{
+    av_log_callback(avcl, level, fmt, vl);
+}
+int av_log_get_level(void)
+{
+    return av_log_level;
+}
+void av_log_set_level(int level)
+{
+    av_log_level = level;
+}
+void av_log_set_callback(void (*callback)(void*, int, const char*, va_list))
+{
+    av_log_callback = callback;
+}
diff --git a/apps/codecs/libatrac/libavutil/log.h b/apps/codecs/libatrac/libavutil/log.h
new file mode 100644
index 0000000000..1206a2fc38
--- /dev/null
+++ b/apps/codecs/libatrac/libavutil/log.h
@@ -0,0 +1,116 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVUTIL_LOG_H
+#define AVUTIL_LOG_H
+#include <stdarg.h>
+#include "avutil.h"
+/**
+ * Describes the class of an AVClass context structure. That is an
+ * arbitrary struct of which the first field is a pointer to an
+ * AVClass struct (e.g. AVCodecContext, AVFormatContext etc.).
+ */
+typedef struct AVCLASS AVClass;
+struct AVCLASS {
+    /**
+     * The name of the class; usually it is the same name as the
+     * context structure type to which the AVClass is associated.
+     */
+    const char* class_name;
+    /**
+     * A pointer to a function which returns the name of a context
+     * instance \p ctx associated with the class.
+     */
+    const char* (*item_name)(void* ctx);
+    /**
+     * a pointer to the first option specified in the class if any or NULL
+     *
+     * @see av_set_default_options()
+     */
+    const struct AVOption *option;
+};
+/* av_log API */
+#define AV_LOG_QUIET    -8
+/**
+ * Something went really wrong and we will crash now.
+ */
+#define AV_LOG_PANIC     0
+/**
+ * Something went wrong and recovery is not possible.
+ * For example, no header was found for a format which depends
+ * on headers or an illegal combination of parameters is used.
+ */
+#define AV_LOG_FATAL     8
+/**
+ * Something went wrong and cannot losslessly be recovered.
+ * However, not all future data is affected.
+ */
+#define AV_LOG_ERROR    16
+/**
+ * Something somehow does not look correct. This may or may not
+ * lead to problems. An example would be the use of '-vstrict -2'.
+ */
+#define AV_LOG_WARNING  24
+#define AV_LOG_INFO     32
+#define AV_LOG_VERBOSE  40
+/**
+ * Stuff which is only useful for libav* developers.
+ */
+#define AV_LOG_DEBUG    48
+/**
+ * Sends the specified message to the log if the level is less than or equal
+ * to the current av_log_level. By default, all logging messages are sent to
+ * stderr. This behavior can be altered by setting a different av_vlog callback
+ * function.
+ *
+ * @param avcl A pointer to an arbitrary struct of which the first field is a
+ * pointer to an AVClass struct.
+ * @param level The importance level of the message, lower values signifying
+ * higher importance.
+ * @param fmt The format string (printf-compatible) that specifies how
+ * subsequent arguments are converted to output.
+ * @see av_vlog
+ */
+#ifdef __GNUC__
+void av_log(void*, int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 3, 4)));
+#else
+void av_log(void*, int level, const char *fmt, ...);
+#endif
+void av_vlog(void*, int level, const char *fmt, va_list);
+int av_log_get_level(void);
+void av_log_set_level(int);
+void av_log_set_callback(void (*)(void*, int, const char*, va_list));
+void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl);
+#endif /* AVUTIL_LOG_H */
diff --git a/apps/codecs/libatrac/libavutil/mem.c b/apps/codecs/libatrac/libavutil/mem.c
new file mode 100644
index 0000000000..9721222e50
--- /dev/null
+++ b/apps/codecs/libatrac/libavutil/mem.c
@@ -0,0 +1,159 @@
+/*
+ * default memory allocator for libavutil
+ * Copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/mem.c
+ * default memory allocator for libavutil
+ */
+//#include "ffmpeg_config.h"
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+#include "mem.h"
+/* here we can use OS-dependent allocation functions */
+#undef free
+#undef malloc
+#undef realloc
+/* You can redefine av_malloc and av_free in your project to use your
+   memory allocator. You do not need to suppress this file because the
+   linker will do it automatically. */
+void *av_malloc(unsigned int size)
+{
+    void *ptr = NULL;
+#if CONFIG_MEMALIGN_HACK
+    long diff;
+#endif
+    /* let's disallow possible ambiguous cases */
+    if(size > (INT_MAX-16) )
+        return NULL;
+#if CONFIG_MEMALIGN_HACK
+    ptr = malloc(size+16);
+    if(!ptr)
+        return ptr;
+    diff= ((-(long)ptr - 1)&15) + 1;
+    ptr = (char*)ptr + diff;
+    ((char*)ptr)[-1]= diff;
+#elif HAVE_POSIX_MEMALIGN
+    if (posix_memalign(&ptr,16,size))
+        ptr = NULL;
+#elif HAVE_MEMALIGN
+    ptr = memalign(16,size);
+    /* Why 64?
+       Indeed, we should align it:
+         on 4 for 386
+         on 16 for 486
+         on 32 for 586, PPro - K6-III
+         on 64 for K7 (maybe for P3 too).
+       Because L1 and L2 caches are aligned on those values.
+       But I don't want to code such logic here!
+     */
+     /* Why 16?
+        Because some CPUs need alignment, for example SSE2 on P4, & most RISC CPUs
+        it will just trigger an exception and the unaligned load will be done in the
+        exception handler or it will just segfault (SSE2 on P4).
+        Why not larger? Because I did not see a difference in benchmarks ...
+     */
+     /* benchmarks with P3
+        memalign(64)+1          3071,3051,3032
+        memalign(64)+2          3051,3032,3041
+        memalign(64)+4          2911,2896,2915
+        memalign(64)+8          2545,2554,2550
+        memalign(64)+16         2543,2572,2563
+        memalign(64)+32         2546,2545,2571
+        memalign(64)+64         2570,2533,2558
+        BTW, malloc seems to do 8-byte alignment by default here.
+     */
+#else
+    ptr = malloc(size);
+#endif
+    return ptr;
+}
+void *av_realloc(void *ptr, unsigned int size)
+{
+#if CONFIG_MEMALIGN_HACK
+    int diff;
+#endif
+    /* let's disallow possible ambiguous cases */
+    if(size > (INT_MAX-16) )
+        return NULL;
+#if CONFIG_MEMALIGN_HACK
+    //FIXME this isn't aligned correctly, though it probably isn't needed
+    if(!ptr) return av_malloc(size);
+    diff= ((char*)ptr)[-1];
+    return (char*)realloc((char*)ptr - diff, size + diff) + diff;
+#else
+    return realloc(ptr, size);
+#endif
+}
+void av_free(void *ptr)
+{
+    /* XXX: this test should not be needed on most libcs */
+    if (ptr)
+#if CONFIG_MEMALIGN_HACK
+        free((char*)ptr - ((char*)ptr)[-1]);
+#else
+        free(ptr);
+#endif
+}
+void av_freep(void *arg)
+{
+    void **ptr= (void**)arg;
+    av_free(*ptr);
+    *ptr = NULL;
+}
+void *av_mallocz(unsigned int size)
+{
+    void *ptr = av_malloc(size);
+    if (ptr)
+        memset(ptr, 0, size);
+    return ptr;
+}
+char *av_strdup(const char *s)
+{
+    char *ptr= NULL;
+    if(s){
+        int len = strlen(s) + 1;
+        ptr = av_malloc(len);
+        if (ptr)
+            memcpy(ptr, s, len);
+    }
+    return ptr;
+}
diff --git a/apps/codecs/libatrac/libavutil/mem.h b/apps/codecs/libatrac/libavutil/mem.h
new file mode 100644
index 0000000000..e50553aefe
--- /dev/null
+++ b/apps/codecs/libatrac/libavutil/mem.h
@@ -0,0 +1,104 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/mem.h
+ * memory handling functions
+ */
+#ifndef AVUTIL_MEM_H
+#define AVUTIL_MEM_H
+#include "common.h"
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+    #define av_malloc_attrib __attribute__((__malloc__))
+#else
+    #define av_malloc_attrib
+#endif
+#if (!defined(__ICC) || __ICC > 1100) && AV_GCC_VERSION_AT_LEAST(4,3)
+    #define av_alloc_size(n) __attribute__((alloc_size(n)))
+#else
+    #define av_alloc_size(n)
+#endif
+/**
+ * Allocates a block of \p size bytes with alignment suitable for all
+ * memory accesses (including vectors if available on the CPU).
+ * @param size Size in bytes for the memory block to be allocated.
+ * @return Pointer to the allocated block, NULL if the block cannot
+ * be allocated.
+ * @see av_mallocz()
+ */
+void *av_malloc(unsigned int size) av_malloc_attrib av_alloc_size(1);
+/**
+ * Allocates or reallocates a block of memory.
+ * If \p ptr is NULL and \p size > 0, allocates a new block. If \p
+ * size is zero, frees the memory block pointed to by \p ptr.
+ * @param size Size in bytes for the memory block to be allocated or
+ * reallocated.
+ * @param ptr Pointer to a memory block already allocated with
+ * av_malloc(z)() or av_realloc() or NULL.
+ * @return Pointer to a newly reallocated block or NULL if the block
+ * cannot be reallocated or the function is used to free the memory block.
+ * @see av_fast_realloc()
+ */
+void *av_realloc(void *ptr, unsigned int size) av_alloc_size(2);
+/**
+ * Frees a memory block which has been allocated with av_malloc(z)() or
+ * av_realloc().
+ * @param ptr Pointer to the memory block which should be freed.
+ * @note ptr = NULL is explicitly allowed.
+ * @note It is recommended that you use av_freep() instead.
+ * @see av_freep()
+ */
+void av_free(void *ptr);
+/**
+ * Allocates a block of \p size bytes with alignment suitable for all
+ * memory accesses (including vectors if available on the CPU) and
+ * zeroes all the bytes of the block.
+ * @param size Size in bytes for the memory block to be allocated.
+ * @return Pointer to the allocated block, NULL if it cannot be allocated.
+ * @see av_malloc()
+ */
+void *av_mallocz(unsigned int size) av_malloc_attrib av_alloc_size(1);
+/**
+ * Duplicates the string \p s.
+ * @param s string to be duplicated
+ * @return Pointer to a newly allocated string containing a
+ * copy of \p s or NULL if the string cannot be allocated.
+ */
+char *av_strdup(const char *s) av_malloc_attrib;
+/**
+ * Frees a memory block which has been allocated with av_malloc(z)() or
+ * av_realloc() and set the pointer pointing to it to NULL.
+ * @param ptr Pointer to the pointer to the memory block which should
+ * be freed.
+ * @see av_free()
+ */
+void av_freep(void *ptr);
+#endif /* AVUTIL_MEM_H */
diff --git a/apps/codecs/libatrac/mdct.c b/apps/codecs/libatrac/mdct.c
new file mode 100644
index 0000000000..670b6d381e
--- /dev/null
+++ b/apps/codecs/libatrac/mdct.c
@@ -0,0 +1,245 @@
+/*
+ * MDCT/IMDCT transforms
+ * Copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "dsputil.h"
+#ifndef M_E
+#define M_E            2.7182818284590452354   /* e */
+#endif
+#ifndef M_LN2
+#define M_LN2          0.69314718055994530942  /* log_e 2 */
+#endif
+#ifndef M_LN10
+#define M_LN10         2.30258509299404568402  /* log_e 10 */
+#endif
+#ifndef M_PI
+#define M_PI           3.14159265358979323846  /* pi */
+#endif
+#ifndef M_SQRT1_2
+#define M_SQRT1_2      0.70710678118654752440  /* 1/sqrt(2) */
+#endif
+/**
+ * @file libavcodec/mdct.c
+ * MDCT/IMDCT transforms.
+ */
+// Generate a Kaiser-Bessel Derived Window.
+#define BESSEL_I0_ITER 50 // default: 50 iterations of Bessel I0 approximation
+av_cold void ff_kbd_window_init(float *window, float alpha, int n)
+{
+   int i, j;
+   double sum = 0.0, bessel, tmp;
+   double local_window[n];
+   double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n);
+   for (i = 0; i < n; i++) {
+       tmp = i * (n - i) * alpha2;
+       bessel = 1.0;
+       for (j = BESSEL_I0_ITER; j > 0; j--)
+           bessel = bessel * tmp / (j * j) + 1;
+       sum += bessel;
+       local_window[i] = sum;
+   }
+   sum++;
+   for (i = 0; i < n; i++)
+       window[i] = sqrt(local_window[i] / sum);
+}
+DECLARE_ALIGNED(16, float, ff_sine_128 [ 128]);
+DECLARE_ALIGNED(16, float, ff_sine_256 [ 256]);
+DECLARE_ALIGNED(16, float, ff_sine_512 [ 512]);
+DECLARE_ALIGNED(16, float, ff_sine_1024[1024]);
+DECLARE_ALIGNED(16, float, ff_sine_2048[2048]);
+DECLARE_ALIGNED(16, float, ff_sine_4096[4096]);
+float *ff_sine_windows[6] = {
+    ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, ff_sine_2048, ff_sine_4096
+};
+// Generate a sine window.
+av_cold void ff_sine_window_init(float *window, int n) {
+    int i;
+    for(i = 0; i < n; i++)
+        window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n)));
+}
+/**
+ * init MDCT or IMDCT computation.
+ */
+av_cold int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
+{
+    int n, n4, i;
+    double alpha;
+    memset(s, 0, sizeof(*s));
+    n = 1 << nbits;
+    s->nbits = nbits;
+    s->n = n;
+    n4 = n >> 2;
+    s->tcos = av_malloc(n4 * sizeof(FFTSample));
+    if (!s->tcos)
+        goto fail;
+    s->tsin = av_malloc(n4 * sizeof(FFTSample));
+    if (!s->tsin)
+        goto fail;
+    for(i=0;i<n4;i++) {
+        alpha = 2 * M_PI * (i + 1.0 / 8.0) / n;
+        s->tcos[i] = -cos(alpha);
+        s->tsin[i] = -sin(alpha);
+    }
+    if (ff_fft_init(&s->fft, s->nbits - 2, inverse) < 0)
+        goto fail;
+    return 0;
+ fail:
+    av_freep(&s->tcos);
+    av_freep(&s->tsin);
+    return -1;
+}
+/* complex multiplication: p = a * b */
+#define CMUL(pre, pim, are, aim, bre, bim) \
+{\
+    FFTSample _are = (are);\
+    FFTSample _aim = (aim);\
+    FFTSample _bre = (bre);\
+    FFTSample _bim = (bim);\
+    (pre) = _are * _bre - _aim * _bim;\
+    (pim) = _are * _bim + _aim * _bre;\
+}
+/**
+ * Compute the middle half of the inverse MDCT of size N = 2^nbits,
+ * thus excluding the parts that can be derived by symmetry
+ * @param output N/2 samples
+ * @param input N/2 samples
+ */
+void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k, n8, n4, n2, n, j;
+    const uint16_t *revtab = s->fft.revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2;
+    FFTComplex *z = (FFTComplex *)output;
+    n = 1 << s->nbits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+    for(k = 0; k < n4; k++) {
+        j=revtab[k];
+        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
+        in1 += 2;
+        in2 -= 2;
+    }
+    ff_fft_calc(&s->fft, z);
+    /* post rotation + reordering */
+    output += n4;
+    for(k = 0; k < n8; k++) {
+        FFTSample r0, i0, r1, i1;
+        CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
+        CMUL(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
+        z[n8-k-1].re = r0;
+        z[n8-k-1].im = i0;
+        z[n8+k  ].re = r1;
+        z[n8+k  ].im = i1;
+    }
+}
+/**
+ * Compute inverse MDCT of size N = 2^nbits
+ * @param output N samples
+ * @param input N/2 samples
+ */
+void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k;
+    int n = 1 << s->nbits;
+    int n2 = n >> 1;
+    int n4 = n >> 2;
+    ff_imdct_half_c(s, output+n4, input);
+    for(k = 0; k < n4; k++) {
+        output[k] = -output[n2-k-1];
+        output[n-k-1] = output[n2+k];
+    }
+}
+/**
+ * Compute MDCT of size N = 2^nbits
+ * @param input N samples
+ * @param out N/2 samples
+ */
+void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input)
+{
+    int i, j, n, n8, n4, n2, n3;
+    FFTSample re, im;
+    const uint16_t *revtab = s->fft.revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    FFTComplex *x = (FFTComplex *)out;
+    n = 1 << s->nbits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+    n3 = 3 * n4;
+    /* pre rotation */
+    for(i=0;i<n8;i++) {
+        re = -input[2*i+3*n4] - input[n3-1-2*i];
+        im = -input[n4+2*i] + input[n4-1-2*i];
+        j = revtab[i];
+        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
+        re = input[2*i] - input[n2-1-2*i];
+        im = -(input[n2+2*i] + input[n-1-2*i]);
+        j = revtab[n8 + i];
+        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
+    }
+    ff_fft_calc(&s->fft, x);
+    /* post rotation */
+    for(i=0;i<n8;i++) {
+        FFTSample r0, i0, r1, i1;
+        CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
+        CMUL(i0, r1, x[n8+i  ].re, x[n8+i  ].im, -tsin[n8+i  ], -tcos[n8+i  ]);
+        x[n8-i-1].re = r0;
+        x[n8-i-1].im = i0;
+        x[n8+i  ].re = r1;
+        x[n8+i  ].im = i1;
+    }
+}
+av_cold void ff_mdct_end(MDCTContext *s)
+{
+    av_freep(&s->tcos);
+    av_freep(&s->tsin);
+    ff_fft_end(&s->fft);
+}
diff --git a/apps/codecs/librm/rm.c b/apps/codecs/librm/rm.c
index 00ee37a2f2..7053e7d58c 100644
--- a/apps/codecs/librm/rm.c
+++ b/apps/codecs/librm/rm.c
@@ -29,7 +29,7 @@
 #define SWAP(a, b) do{uint8_t SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
-static void advance_buffer(uint8_t **buf, int val)
+void advance_buffer(uint8_t **buf, int val)
 {
    *buf += val;
 }
@@ -237,10 +237,40 @@ static int real_read_audio_stream_info(int fd, RMContext *rmctx)
           skipped += 1;
       }
  
-       read_uint32be(fd, &rmctx->extradata_size);
+       switch(fourcc) {
-       skipped += 4;
+           case FOURCC('c','o','o','k'):               
-       read(fd, rmctx->codec_extradata, rmctx->extradata_size);
+               rmctx->codec_type = CODEC_COOK;
-       skipped += rmctx->extradata_size;
+               read_uint32be(fd, &rmctx->extradata_size);
+               skipped += 4;
+               read(fd, rmctx->codec_extradata, rmctx->extradata_size);
+               skipped += rmctx->extradata_size;
+               break;
+           case FOURCC('a','t','r','c'):  
+DEBUGF("WERE HERE\n");             
+               rmctx->codec_type = CODEC_ATRAC;
+               read_uint32be(fd, &rmctx->extradata_size);
+               skipped += 4;
+               read(fd, rmctx->codec_extradata, rmctx->extradata_size);
+               skipped += rmctx->extradata_size;
+               break;
+           case FOURCC('r','a','a','c'):
+           case FOURCC('r','a','c','p'):
+               rmctx->codec_type = CODEC_AAC;
+               read_uint32be(fd, &rmctx->extradata_size);
+               skipped += 4;
+               read(fd, rmctx->codec_extradata, rmctx->extradata_size);
+               skipped += rmctx->extradata_size;
+               break;
+           case FOURCC('d','n','e','t'):
+               rmctx->codec_type = CODEC_AC3;
+               break;
+           default: /* Not a supported codec */
+               return -1;
+       }
       
       
       DEBUGF("        flavor = %d\n",flavor);
@@ -252,8 +282,10 @@ static int real_read_audio_stream_info(int fd, RMContext *rmctx)
       DEBUGF("        channels= %d\n",rmctx->nb_channels);
       DEBUGF("        fourcc = %s\n",fourcc2str(fourcc));
       DEBUGF("        codec_extra_data_length = %d\n",rmctx->extradata_size);
-       DEBUGF("        codec_extradata :\n");
+       if(rmctx->codec_type == CODEC_COOK) {
-       print_cook_extradata(rmctx);
+           DEBUGF("        cook_extradata :\n");
+           print_cook_extradata(rmctx);
+       }
    }
@@ -530,7 +562,7 @@ int rm_get_packet(uint8_t **src,RMContext *rmctx, RMPacket *pkt)
        
        advance_buffer(src,12);
        consumed += 12;
-        if (rmctx->codec_type == CODEC_COOK) {
+        if (rmctx->codec_type == CODEC_COOK || rmctx->codec_type == CODEC_ATRAC) {
            for(x = 0 ; x < w/sps; x++)
            {
                place = sps*(h*x+((h+1)/2)*(y&1)+(y>>1)); 
diff --git a/apps/codecs/librm/rm.h b/apps/codecs/librm/rm.h
index 86fe5e7f1a..be88701989 100644
--- a/apps/codecs/librm/rm.h
+++ b/apps/codecs/librm/rm.h
@@ -31,7 +31,8 @@
 enum codecs {
    CODEC_COOK, 
    CODEC_AAC,
-    CODEC_AC3
+    CODEC_AC3,
+    CODEC_ATRAC
 };
 typedef struct rm_packet
author	Mohamed Tarek <mt@rockbox.org>	2009-08-10 14:46:31 +0000
committer	Mohamed Tarek <mt@rockbox.org>	2009-08-10 14:46:31 +0000
commit	519adfbaae5933ec18d2e3cb77e690e36cd49916 (patch)
tree	8303456ddf72f2ff5da2b9a99802c73e1409fc28 /apps/codecs
parent	1c0aeb18ca264e1f64e31f219189afab442676eb (diff)
download	rockbox-519adfbaae5933ec18d2e3cb77e690e36cd49916.tar.gz rockbox-519adfbaae5933ec18d2e3cb77e690e36cd49916.zip