Add libwmapro to trunk.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25739 a1c6a512-1295-4272-9138-f99709370657
author: Mohamed Tarek <mt@rockbox.org> 2010-04-27 18:04:34 +0000
committer: Mohamed Tarek <mt@rockbox.org> 2010-04-27 18:04:34 +0000
commit: dda7fab1d65e73a6bdbdac1b1d37330b8f0085aa (patch)
tree: 6446710d45643347d1f301bbfcb914e3ec4a39e4
parent: f96406c5f3aca04f3c0ab95275d857288fd8c2ea (diff)
download: rockbox-dda7fab1d65e73a6bdbdac1b1d37330b8f0085aa.tar.gz
rockbox-dda7fab1d65e73a6bdbdac1b1d37330b8f0085aa.zip
30 files changed, 16068 insertions, 0 deletions
diff --git a/apps/codecs/libwmapro/Makefile b/apps/codecs/libwmapro/Makefile
new file mode 100644
index 0000000000..1fdd82090c
--- /dev/null
+++ b/apps/codecs/libwmapro/Makefile
@@ -0,0 +1,11 @@
+STD = c99
+LINK = -lm
+CFLAGS = -Wall -std=$(STD)
+TARGET = test
+OBJS = wmaprodec.c wma.c dsputil.c mdct.c fft.c bitstream.c libavutil/log.c libavutil/mem.c libavutil/mathematics.c
+$(TARGET): $(OBJS)
+        gcc $(CFLAGS) $(OBJS) $(LINK) -o $(TARGET)
+        
+clean:
+        rm -f $(TARGET) *~
diff --git a/apps/codecs/libwmapro/avcodec.h b/apps/codecs/libwmapro/avcodec.h
new file mode 100644
index 0000000000..691dba725f
--- /dev/null
+++ b/apps/codecs/libwmapro/avcodec.h
@@ -0,0 +1,3481 @@
+/*
+ * copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_AVCODEC_H
+#define AVCODEC_AVCODEC_H
+/**
+ * @file libavcodec/avcodec.h
+ * external API header
+ */
+#include <errno.h>
+#include "libavutil/avutil.h"
+#define LIBAVCODEC_VERSION_MAJOR 52
+#define LIBAVCODEC_VERSION_MINOR 22
+#define LIBAVCODEC_VERSION_MICRO  0
+#define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
+                                               LIBAVCODEC_VERSION_MINOR, \
+                                               LIBAVCODEC_VERSION_MICRO)
+#define LIBAVCODEC_VERSION      AV_VERSION(LIBAVCODEC_VERSION_MAJOR,    \
+                                           LIBAVCODEC_VERSION_MINOR,    \
+                                           LIBAVCODEC_VERSION_MICRO)
+#define LIBAVCODEC_BUILD        LIBAVCODEC_VERSION_INT
+#define LIBAVCODEC_IDENT        "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
+#define AV_NOPTS_VALUE          INT64_C(0x8000000000000000)
+#define AV_TIME_BASE            1000000
+#define AV_TIME_BASE_Q          (AVRational){1, AV_TIME_BASE}
+/**
+ * Identifies the syntax and semantics of the bitstream.
+ * The principle is roughly:
+ * Two decoders with the same ID can decode the same streams.
+ * Two encoders with the same ID can encode compatible streams.
+ * There may be slight deviations from the principle due to implementation
+ * details.
+ *
+ * If you add a codec ID to this list, add it so that
+ * 1. no value of a existing codec ID changes (that would break ABI),
+ * 2. it is as close as possible to similar codecs.
+ */
+enum CodecID {
+    CODEC_ID_NONE,
+    /* video codecs */
+    CODEC_ID_MPEG1VIDEO,
+    CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding
+    CODEC_ID_MPEG2VIDEO_XVMC,
+    CODEC_ID_H261,
+    CODEC_ID_H263,
+    CODEC_ID_RV10,
+    CODEC_ID_RV20,
+    CODEC_ID_MJPEG,
+    CODEC_ID_MJPEGB,
+    CODEC_ID_LJPEG,
+    CODEC_ID_SP5X,
+    CODEC_ID_JPEGLS,
+    CODEC_ID_MPEG4,
+    CODEC_ID_RAWVIDEO,
+    CODEC_ID_MSMPEG4V1,
+    CODEC_ID_MSMPEG4V2,
+    CODEC_ID_MSMPEG4V3,
+    CODEC_ID_WMV1,
+    CODEC_ID_WMV2,
+    CODEC_ID_H263P,
+    CODEC_ID_H263I,
+    CODEC_ID_FLV1,
+    CODEC_ID_SVQ1,
+    CODEC_ID_SVQ3,
+    CODEC_ID_DVVIDEO,
+    CODEC_ID_HUFFYUV,
+    CODEC_ID_CYUV,
+    CODEC_ID_H264,
+    CODEC_ID_INDEO3,
+    CODEC_ID_VP3,
+    CODEC_ID_THEORA,
+    CODEC_ID_ASV1,
+    CODEC_ID_ASV2,
+    CODEC_ID_FFV1,
+    CODEC_ID_4XM,
+    CODEC_ID_VCR1,
+    CODEC_ID_CLJR,
+    CODEC_ID_MDEC,
+    CODEC_ID_ROQ,
+    CODEC_ID_INTERPLAY_VIDEO,
+    CODEC_ID_XAN_WC3,
+    CODEC_ID_XAN_WC4,
+    CODEC_ID_RPZA,
+    CODEC_ID_CINEPAK,
+    CODEC_ID_WS_VQA,
+    CODEC_ID_MSRLE,
+    CODEC_ID_MSVIDEO1,
+    CODEC_ID_IDCIN,
+    CODEC_ID_8BPS,
+    CODEC_ID_SMC,
+    CODEC_ID_FLIC,
+    CODEC_ID_TRUEMOTION1,
+    CODEC_ID_VMDVIDEO,
+    CODEC_ID_MSZH,
+    CODEC_ID_ZLIB,
+    CODEC_ID_QTRLE,
+    CODEC_ID_SNOW,
+    CODEC_ID_TSCC,
+    CODEC_ID_ULTI,
+    CODEC_ID_QDRAW,
+    CODEC_ID_VIXL,
+    CODEC_ID_QPEG,
+    CODEC_ID_XVID,
+    CODEC_ID_PNG,
+    CODEC_ID_PPM,
+    CODEC_ID_PBM,
+    CODEC_ID_PGM,
+    CODEC_ID_PGMYUV,
+    CODEC_ID_PAM,
+    CODEC_ID_FFVHUFF,
+    CODEC_ID_RV30,
+    CODEC_ID_RV40,
+    CODEC_ID_VC1,
+    CODEC_ID_WMV3,
+    CODEC_ID_LOCO,
+    CODEC_ID_WNV1,
+    CODEC_ID_AASC,
+    CODEC_ID_INDEO2,
+    CODEC_ID_FRAPS,
+    CODEC_ID_TRUEMOTION2,
+    CODEC_ID_BMP,
+    CODEC_ID_CSCD,
+    CODEC_ID_MMVIDEO,
+    CODEC_ID_ZMBV,
+    CODEC_ID_AVS,
+    CODEC_ID_SMACKVIDEO,
+    CODEC_ID_NUV,
+    CODEC_ID_KMVC,
+    CODEC_ID_FLASHSV,
+    CODEC_ID_CAVS,
+    CODEC_ID_JPEG2000,
+    CODEC_ID_VMNC,
+    CODEC_ID_VP5,
+    CODEC_ID_VP6,
+    CODEC_ID_VP6F,
+    CODEC_ID_TARGA,
+    CODEC_ID_DSICINVIDEO,
+    CODEC_ID_TIERTEXSEQVIDEO,
+    CODEC_ID_TIFF,
+    CODEC_ID_GIF,
+    CODEC_ID_FFH264,
+    CODEC_ID_DXA,
+    CODEC_ID_DNXHD,
+    CODEC_ID_THP,
+    CODEC_ID_SGI,
+    CODEC_ID_C93,
+    CODEC_ID_BETHSOFTVID,
+    CODEC_ID_PTX,
+    CODEC_ID_TXD,
+    CODEC_ID_VP6A,
+    CODEC_ID_AMV,
+    CODEC_ID_VB,
+    CODEC_ID_PCX,
+    CODEC_ID_SUNRAST,
+    CODEC_ID_INDEO4,
+    CODEC_ID_INDEO5,
+    CODEC_ID_MIMIC,
+    CODEC_ID_RL2,
+    CODEC_ID_8SVX_EXP,
+    CODEC_ID_8SVX_FIB,
+    CODEC_ID_ESCAPE124,
+    CODEC_ID_DIRAC,
+    CODEC_ID_BFI,
+    CODEC_ID_CMV,
+    CODEC_ID_MOTIONPIXELS,
+    CODEC_ID_TGV,
+    CODEC_ID_TGQ,
+    CODEC_ID_TQI,
+    /* various PCM "codecs" */
+    CODEC_ID_PCM_S16LE= 0x10000,
+    CODEC_ID_PCM_S16BE,
+    CODEC_ID_PCM_U16LE,
+    CODEC_ID_PCM_U16BE,
+    CODEC_ID_PCM_S8,
+    CODEC_ID_PCM_U8,
+    CODEC_ID_PCM_MULAW,
+    CODEC_ID_PCM_ALAW,
+    CODEC_ID_PCM_S32LE,
+    CODEC_ID_PCM_S32BE,
+    CODEC_ID_PCM_U32LE,
+    CODEC_ID_PCM_U32BE,
+    CODEC_ID_PCM_S24LE,
+    CODEC_ID_PCM_S24BE,
+    CODEC_ID_PCM_U24LE,
+    CODEC_ID_PCM_U24BE,
+    CODEC_ID_PCM_S24DAUD,
+    CODEC_ID_PCM_ZORK,
+    CODEC_ID_PCM_S16LE_PLANAR,
+    CODEC_ID_PCM_DVD,
+    CODEC_ID_PCM_F32BE,
+    CODEC_ID_PCM_F32LE,
+    CODEC_ID_PCM_F64BE,
+    CODEC_ID_PCM_F64LE,
+    /* various ADPCM codecs */
+    CODEC_ID_ADPCM_IMA_QT= 0x11000,
+    CODEC_ID_ADPCM_IMA_WAV,
+    CODEC_ID_ADPCM_IMA_DK3,
+    CODEC_ID_ADPCM_IMA_DK4,
+    CODEC_ID_ADPCM_IMA_WS,
+    CODEC_ID_ADPCM_IMA_SMJPEG,
+    CODEC_ID_ADPCM_MS,
+    CODEC_ID_ADPCM_4XM,
+    CODEC_ID_ADPCM_XA,
+    CODEC_ID_ADPCM_ADX,
+    CODEC_ID_ADPCM_EA,
+    CODEC_ID_ADPCM_G726,
+    CODEC_ID_ADPCM_CT,
+    CODEC_ID_ADPCM_SWF,
+    CODEC_ID_ADPCM_YAMAHA,
+    CODEC_ID_ADPCM_SBPRO_4,
+    CODEC_ID_ADPCM_SBPRO_3,
+    CODEC_ID_ADPCM_SBPRO_2,
+    CODEC_ID_ADPCM_THP,
+    CODEC_ID_ADPCM_IMA_AMV,
+    CODEC_ID_ADPCM_EA_R1,
+    CODEC_ID_ADPCM_EA_R3,
+    CODEC_ID_ADPCM_EA_R2,
+    CODEC_ID_ADPCM_IMA_EA_SEAD,
+    CODEC_ID_ADPCM_IMA_EA_EACS,
+    CODEC_ID_ADPCM_EA_XAS,
+    CODEC_ID_ADPCM_EA_MAXIS_XA,
+    CODEC_ID_ADPCM_IMA_ISS,
+    /* AMR */
+    CODEC_ID_AMR_NB= 0x12000,
+    CODEC_ID_AMR_WB,
+    /* RealAudio codecs*/
+    CODEC_ID_RA_144= 0x13000,
+    CODEC_ID_RA_288,
+    /* various DPCM codecs */
+    CODEC_ID_ROQ_DPCM= 0x14000,
+    CODEC_ID_INTERPLAY_DPCM,
+    CODEC_ID_XAN_DPCM,
+    CODEC_ID_SOL_DPCM,
+    /* audio codecs */
+    CODEC_ID_MP2= 0x15000,
+    CODEC_ID_MP3, ///< preferred ID for decoding MPEG audio layer 1, 2 or 3
+    CODEC_ID_AAC,
+    CODEC_ID_AC3,
+    CODEC_ID_DTS,
+    CODEC_ID_VORBIS,
+    CODEC_ID_DVAUDIO,
+    CODEC_ID_WMAV1,
+    CODEC_ID_WMAV2,
+    CODEC_ID_MACE3,
+    CODEC_ID_MACE6,
+    CODEC_ID_VMDAUDIO,
+    CODEC_ID_SONIC,
+    CODEC_ID_SONIC_LS,
+    CODEC_ID_FLAC,
+    CODEC_ID_MP3ADU,
+    CODEC_ID_MP3ON4,
+    CODEC_ID_SHORTEN,
+    CODEC_ID_ALAC,
+    CODEC_ID_WESTWOOD_SND1,
+    CODEC_ID_GSM, ///< as in Berlin toast format
+    CODEC_ID_QDM2,
+    CODEC_ID_COOK,
+    CODEC_ID_TRUESPEECH,
+    CODEC_ID_TTA,
+    CODEC_ID_SMACKAUDIO,
+    CODEC_ID_QCELP,
+    CODEC_ID_WAVPACK,
+    CODEC_ID_DSICINAUDIO,
+    CODEC_ID_IMC,
+    CODEC_ID_MUSEPACK7,
+    CODEC_ID_MLP,
+    CODEC_ID_GSM_MS, /* as found in WAV */
+    CODEC_ID_ATRAC3,
+    CODEC_ID_VOXWARE,
+    CODEC_ID_APE,
+    CODEC_ID_NELLYMOSER,
+    CODEC_ID_MUSEPACK8,
+    CODEC_ID_SPEEX,
+    CODEC_ID_WMAVOICE,
+    CODEC_ID_WMAPRO,
+    CODEC_ID_WMALOSSLESS,
+    CODEC_ID_ATRAC3P,
+    CODEC_ID_EAC3,
+    CODEC_ID_SIPR,
+    CODEC_ID_MP1,
+    CODEC_ID_TWINVQ,
+    CODEC_ID_TRUEHD,
+    /* subtitle codecs */
+    CODEC_ID_DVD_SUBTITLE= 0x17000,
+    CODEC_ID_DVB_SUBTITLE,
+    CODEC_ID_TEXT,  ///< raw UTF-8 text
+    CODEC_ID_XSUB,
+    CODEC_ID_SSA,
+    CODEC_ID_MOV_TEXT,
+    /* other specific kind of codecs (generally used for attachments) */
+    CODEC_ID_TTF= 0x18000,
+    CODEC_ID_PROBE= 0x19000, ///< codec_id is not known (like CODEC_ID_NONE) but lavf should attempt to identify it
+    CODEC_ID_MPEG2TS= 0x20000, /**< _FAKE_ codec to indicate a raw MPEG-2 TS
+                                * stream (only used by libavformat) */
+};
+enum CodecType {
+    CODEC_TYPE_UNKNOWN = -1,
+    CODEC_TYPE_VIDEO,
+    CODEC_TYPE_AUDIO,
+    CODEC_TYPE_DATA,
+    CODEC_TYPE_SUBTITLE,
+    CODEC_TYPE_ATTACHMENT,
+    CODEC_TYPE_NB
+};
+/**
+ * all in native-endian format
+ */
+enum SampleFormat {
+    SAMPLE_FMT_NONE = -1,
+    SAMPLE_FMT_U8,              ///< unsigned 8 bits
+    SAMPLE_FMT_S16,             ///< signed 16 bits
+    SAMPLE_FMT_S32,             ///< signed 32 bits
+    SAMPLE_FMT_FLT,             ///< float
+    SAMPLE_FMT_DBL,             ///< double
+    SAMPLE_FMT_NB               ///< Number of sample formats. DO NOT USE if dynamically linking to libavcodec
+};
+/* Audio channel masks */
+#define CH_FRONT_LEFT             0x00000001
+#define CH_FRONT_RIGHT            0x00000002
+#define CH_FRONT_CENTER           0x00000004
+#define CH_LOW_FREQUENCY          0x00000008
+#define CH_BACK_LEFT              0x00000010
+#define CH_BACK_RIGHT             0x00000020
+#define CH_FRONT_LEFT_OF_CENTER   0x00000040
+#define CH_FRONT_RIGHT_OF_CENTER  0x00000080
+#define CH_BACK_CENTER            0x00000100
+#define CH_SIDE_LEFT              0x00000200
+#define CH_SIDE_RIGHT             0x00000400
+#define CH_TOP_CENTER             0x00000800
+#define CH_TOP_FRONT_LEFT         0x00001000
+#define CH_TOP_FRONT_CENTER       0x00002000
+#define CH_TOP_FRONT_RIGHT        0x00004000
+#define CH_TOP_BACK_LEFT          0x00008000
+#define CH_TOP_BACK_CENTER        0x00010000
+#define CH_TOP_BACK_RIGHT         0x00020000
+#define CH_STEREO_LEFT            0x20000000  ///< Stereo downmix.
+#define CH_STEREO_RIGHT           0x40000000  ///< See CH_STEREO_LEFT.
+/* Audio channel convenience macros */
+#define CH_LAYOUT_MONO              (CH_FRONT_CENTER)
+#define CH_LAYOUT_STEREO            (CH_FRONT_LEFT|CH_FRONT_RIGHT)
+#define CH_LAYOUT_SURROUND          (CH_LAYOUT_STEREO|CH_FRONT_CENTER)
+#define CH_LAYOUT_QUAD              (CH_LAYOUT_STEREO|CH_BACK_LEFT|CH_BACK_RIGHT)
+#define CH_LAYOUT_5POINT0           (CH_LAYOUT_SURROUND|CH_SIDE_LEFT|CH_SIDE_RIGHT)
+#define CH_LAYOUT_5POINT1           (CH_LAYOUT_5POINT0|CH_LOW_FREQUENCY)
+#define CH_LAYOUT_7POINT1           (CH_LAYOUT_5POINT1|CH_BACK_LEFT|CH_BACK_RIGHT)
+#define CH_LAYOUT_7POINT1_WIDE      (CH_LAYOUT_SURROUND|CH_LOW_FREQUENCY|\
+                                          CH_BACK_LEFT|CH_BACK_RIGHT|\
+                                          CH_FRONT_LEFT_OF_CENTER|CH_FRONT_RIGHT_OF_CENTER)
+#define CH_LAYOUT_STEREO_DOWNMIX    (CH_STEREO_LEFT|CH_STEREO_RIGHT)
+/* in bytes */
+#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio
+/**
+ * Required number of additionally allocated bytes at the end of the input bitstream for decoding.
+ * This is mainly needed because some optimized bitstream readers read
+ * 32 or 64 bit at once and could read over the end.<br>
+ * Note: If the first 23 bits of the additional bytes are not 0, then damaged
+ * MPEG bitstreams could cause overread and segfault.
+ */
+#define FF_INPUT_BUFFER_PADDING_SIZE 8
+/**
+ * minimum encoding buffer size
+ * Used to avoid some checks during header writing.
+ */
+#define FF_MIN_BUFFER_SIZE 16384
+#if 0/* MT : DELETE THIS LINE.*/
+/**
+ * motion estimation type.
+ */
+enum Motion_Est_ID {
+    ME_ZERO = 1,    ///< no search, that is use 0,0 vector whenever one is needed
+    ME_FULL,
+    ME_LOG,
+    ME_PHODS,
+    ME_EPZS,        ///< enhanced predictive zonal search
+    ME_X1,          ///< reserved for experiments
+    ME_HEX,         ///< hexagon based search
+    ME_UMH,         ///< uneven multi-hexagon search
+    ME_ITER,        ///< iterative search
+    ME_TESA,        ///< transformed exhaustive search algorithm
+};
+enum AVDiscard{
+    /* We leave some space between them for extensions (drop some
+     * keyframes for intra-only or drop just some bidir frames). */
+    AVDISCARD_NONE   =-16, ///< discard nothing
+    AVDISCARD_DEFAULT=  0, ///< discard useless packets like 0 size packets in avi
+    AVDISCARD_NONREF =  8, ///< discard all non reference
+    AVDISCARD_BIDIR  = 16, ///< discard all bidirectional frames
+    AVDISCARD_NONKEY = 32, ///< discard all frames except keyframes
+    AVDISCARD_ALL    = 48, ///< discard all
+};
+typedef struct RcOverride{
+    int start_frame;
+    int end_frame;
+    int qscale; // If this is 0 then quality_factor will be used instead.
+    float quality_factor;
+} RcOverride;
+#define FF_MAX_B_FRAMES 16
+/* encoding support
+   These flags can be passed in AVCodecContext.flags before initialization.
+   Note: Not everything is supported yet.
+*/
+#define CODEC_FLAG_QSCALE 0x0002  ///< Use fixed qscale.
+#define CODEC_FLAG_4MV    0x0004  ///< 4 MV per MB allowed / advanced prediction for H.263.
+#define CODEC_FLAG_QPEL   0x0010  ///< Use qpel MC.
+#define CODEC_FLAG_GMC    0x0020  ///< Use GMC.
+#define CODEC_FLAG_MV0    0x0040  ///< Always try a MB with MV=<0,0>.
+#define CODEC_FLAG_PART   0x0080  ///< Use data partitioning.
+/**
+ * The parent program guarantees that the input for B-frames containing
+ * streams is not written to for at least s->max_b_frames+1 frames, if
+ * this is not set the input will be copied.
+ */
+#define CODEC_FLAG_INPUT_PRESERVED 0x0100
+#define CODEC_FLAG_PASS1           0x0200   ///< Use internal 2pass ratecontrol in first pass mode.
+#define CODEC_FLAG_PASS2           0x0400   ///< Use internal 2pass ratecontrol in second pass mode.
+#define CODEC_FLAG_EXTERN_HUFF     0x1000   ///< Use external Huffman table (for MJPEG).
+#define CODEC_FLAG_GRAY            0x2000   ///< Only decode/encode grayscale.
+#define CODEC_FLAG_EMU_EDGE        0x4000   ///< Don't draw edges.
+#define CODEC_FLAG_PSNR            0x8000   ///< error[?] variables will be set during encoding.
+#define CODEC_FLAG_TRUNCATED       0x00010000 /** Input bitstream might be truncated at a random
+                                                  location instead of only at frame boundaries. */
+#define CODEC_FLAG_NORMALIZE_AQP  0x00020000 ///< Normalize adaptive quantization.
+#define CODEC_FLAG_INTERLACED_DCT 0x00040000 ///< Use interlaced DCT.
+#define CODEC_FLAG_LOW_DELAY      0x00080000 ///< Force low delay.
+#define CODEC_FLAG_ALT_SCAN       0x00100000 ///< Use alternate scan.
+#define CODEC_FLAG_GLOBAL_HEADER  0x00400000 ///< Place global headers in extradata instead of every keyframe.
+#define CODEC_FLAG_BITEXACT       0x00800000 ///< Use only bitexact stuff (except (I)DCT).
+/* Fx : Flag for h263+ extra options */
+#define CODEC_FLAG_AC_PRED        0x01000000 ///< H.263 advanced intra coding / MPEG-4 AC prediction
+#define CODEC_FLAG_H263P_UMV      0x02000000 ///< unlimited motion vector
+#define CODEC_FLAG_CBP_RD         0x04000000 ///< Use rate distortion optimization for cbp.
+#define CODEC_FLAG_QP_RD          0x08000000 ///< Use rate distortion optimization for qp selectioon.
+#define CODEC_FLAG_H263P_AIV      0x00000008 ///< H.263 alternative inter VLC
+#define CODEC_FLAG_OBMC           0x00000001 ///< OBMC
+#define CODEC_FLAG_LOOP_FILTER    0x00000800 ///< loop filter
+#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000
+#define CODEC_FLAG_INTERLACED_ME  0x20000000 ///< interlaced motion estimation
+#define CODEC_FLAG_SVCD_SCAN_OFFSET 0x40000000 ///< Will reserve space for SVCD scan offset user data.
+#define CODEC_FLAG_CLOSED_GOP     0x80000000
+#define CODEC_FLAG2_FAST          0x00000001 ///< Allow non spec compliant speedup tricks.
+#define CODEC_FLAG2_STRICT_GOP    0x00000002 ///< Strictly enforce GOP size.
+#define CODEC_FLAG2_NO_OUTPUT     0x00000004 ///< Skip bitstream encoding.
+#define CODEC_FLAG2_LOCAL_HEADER  0x00000008 ///< Place global headers at every keyframe instead of in extradata.
+#define CODEC_FLAG2_BPYRAMID      0x00000010 ///< H.264 allow B-frames to be used as references.
+#define CODEC_FLAG2_WPRED         0x00000020 ///< H.264 weighted biprediction for B-frames
+#define CODEC_FLAG2_MIXED_REFS    0x00000040 ///< H.264 one reference per partition, as opposed to one reference per macroblock
+#define CODEC_FLAG2_8X8DCT        0x00000080 ///< H.264 high profile 8x8 transform
+#define CODEC_FLAG2_FASTPSKIP     0x00000100 ///< H.264 fast pskip
+#define CODEC_FLAG2_AUD           0x00000200 ///< H.264 access unit delimiters
+#define CODEC_FLAG2_BRDO          0x00000400 ///< B-frame rate-distortion optimization
+#define CODEC_FLAG2_INTRA_VLC     0x00000800 ///< Use MPEG-2 intra VLC table.
+#define CODEC_FLAG2_MEMC_ONLY     0x00001000 ///< Only do ME/MC (I frames -> ref, P frame -> ME+MC).
+#define CODEC_FLAG2_DROP_FRAME_TIMECODE 0x00002000 ///< timecode is in drop frame format.
+#define CODEC_FLAG2_SKIP_RD       0x00004000 ///< RD optimal MB level residual skipping
+#define CODEC_FLAG2_CHUNKS        0x00008000 ///< Input bitstream might be truncated at a packet boundaries instead of only at frame boundaries.
+#define CODEC_FLAG2_NON_LINEAR_QUANT 0x00010000 ///< Use MPEG-2 nonlinear quantizer.
+#define CODEC_FLAG2_BIT_RESERVOIR 0x00020000 ///< Use a bit reservoir when encoding if possible
+/* Unsupported options :
+ *              Syntax Arithmetic coding (SAC)
+ *              Reference Picture Selection
+ *              Independent Segment Decoding */
+/* /Fx */
+/* codec capabilities */
+#define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 ///< Decoder can use draw_horiz_band callback.
+/**
+ * Codec uses get_buffer() for allocating buffers.
+ * direct rendering method 1
+ */
+#define CODEC_CAP_DR1             0x0002
+/* If 'parse_only' field is true, then avcodec_parse_frame() can be used. */
+#define CODEC_CAP_PARSE_ONLY      0x0004
+#define CODEC_CAP_TRUNCATED       0x0008
+/* Codec can export data for HW decoding (XvMC). */
+#define CODEC_CAP_HWACCEL         0x0010
+/**
+ * Codec has a nonzero delay and needs to be fed with NULL at the end to get the delayed data.
+ * If this is not set, the codec is guaranteed to never be fed with NULL data.
+ */
+#define CODEC_CAP_DELAY           0x0020
+/**
+ * Codec can be fed a final frame with a smaller size.
+ * This can be used to prevent truncation of the last audio samples.
+ */
+#define CODEC_CAP_SMALL_LAST_FRAME 0x0040
+/**
+ * Codec can export data for HW decoding (VDPAU).
+ */
+#define CODEC_CAP_HWACCEL_VDPAU    0x0080
+//The following defines may change, don't expect compatibility if you use them.
+#define MB_TYPE_INTRA4x4   0x0001
+#define MB_TYPE_INTRA16x16 0x0002 //FIXME H.264-specific
+#define MB_TYPE_INTRA_PCM  0x0004 //FIXME H.264-specific
+#define MB_TYPE_16x16      0x0008
+#define MB_TYPE_16x8       0x0010
+#define MB_TYPE_8x16       0x0020
+#define MB_TYPE_8x8        0x0040
+#define MB_TYPE_INTERLACED 0x0080
+#define MB_TYPE_DIRECT2    0x0100 //FIXME
+#define MB_TYPE_ACPRED     0x0200
+#define MB_TYPE_GMC        0x0400
+#define MB_TYPE_SKIP       0x0800
+#define MB_TYPE_P0L0       0x1000
+#define MB_TYPE_P1L0       0x2000
+#define MB_TYPE_P0L1       0x4000
+#define MB_TYPE_P1L1       0x8000
+#define MB_TYPE_L0         (MB_TYPE_P0L0 | MB_TYPE_P1L0)
+#define MB_TYPE_L1         (MB_TYPE_P0L1 | MB_TYPE_P1L1)
+#define MB_TYPE_L0L1       (MB_TYPE_L0   | MB_TYPE_L1)
+#define MB_TYPE_QUANT      0x00010000
+#define MB_TYPE_CBP        0x00020000
+//Note bits 24-31 are reserved for codec specific use (h264 ref0, mpeg1 0mv, ...)
+/**
+ * Pan Scan area.
+ * This specifies the area which should be displayed.
+ * Note there may be multiple such areas for one frame.
+ */
+typedef struct AVPanScan{
+    /**
+     * id
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int id;
+    /**
+     * width and height in 1/16 pel
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int width;
+    int height;
+    /**
+     * position of the top left corner in 1/16 pel for up to 3 fields/frames
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int16_t position[3][2];
+}AVPanScan;
+#define FF_COMMON_FRAME \
+    /**\
+     * pointer to the picture planes.\
+     * This might be different from the first allocated byte\
+     * - encoding: \
+     * - decoding: \
+     */\
+    uint8_t *data[4];\
+    int linesize[4];\
+    /**\
+     * pointer to the first allocated byte of the picture. Can be used in get_buffer/release_buffer.\
+     * This isn't used by libavcodec unless the default get/release_buffer() is used.\
+     * - encoding: \
+     * - decoding: \
+     */\
+    uint8_t *base[4];\
+    /**\
+     * 1 -> keyframe, 0-> not\
+     * - encoding: Set by libavcodec.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int key_frame;\
+\
+    /**\
+     * Picture type of the frame, see ?_TYPE below.\
+     * - encoding: Set by libavcodec. for coded_picture (and set by user for input).\
+     * - decoding: Set by libavcodec.\
+     */\
+    int pict_type;\
+\
+    /**\
+     * presentation timestamp in time_base units (time when frame should be shown to user)\
+     * If AV_NOPTS_VALUE then frame_rate = 1/time_base will be assumed.\
+     * - encoding: MUST be set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int64_t pts;\
+\
+    /**\
+     * picture number in bitstream order\
+     * - encoding: set by\
+     * - decoding: Set by libavcodec.\
+     */\
+    int coded_picture_number;\
+    /**\
+     * picture number in display order\
+     * - encoding: set by\
+     * - decoding: Set by libavcodec.\
+     */\
+    int display_picture_number;\
+\
+    /**\
+     * quality (between 1 (good) and FF_LAMBDA_MAX (bad)) \
+     * - encoding: Set by libavcodec. for coded_picture (and set by user for input).\
+     * - decoding: Set by libavcodec.\
+     */\
+    int quality; \
+\
+    /**\
+     * buffer age (1->was last buffer and dint change, 2->..., ...).\
+     * Set to INT_MAX if the buffer has not been used yet.\
+     * - encoding: unused\
+     * - decoding: MUST be set by get_buffer().\
+     */\
+    int age;\
+\
+    /**\
+     * is this picture used as reference\
+     * The values for this are the same as the MpegEncContext.picture_structure\
+     * variable, that is 1->top field, 2->bottom field, 3->frame/both fields.\
+     * Set to 4 for delayed, non-reference frames.\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec. (before get_buffer() call)).\
+     */\
+    int reference;\
+\
+    /**\
+     * QP table\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    int8_t *qscale_table;\
+    /**\
+     * QP store stride\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    int qstride;\
+\
+    /**\
+     * mbskip_table[mb]>=1 if MB didn't change\
+     * stride= mb_width = (width+15)>>4\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    uint8_t *mbskip_table;\
+\
+    /**\
+     * motion vector table\
+     * @code\
+     * example:\
+     * int mv_sample_log2= 4 - motion_subsample_log2;\
+     * int mb_width= (width+15)>>4;\
+     * int mv_stride= (mb_width << mv_sample_log2) + 1;\
+     * motion_val[direction][x + y*mv_stride][0->mv_x, 1->mv_y];\
+     * @endcode\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int16_t (*motion_val[2])[2];\
+\
+    /**\
+     * macroblock type table\
+     * mb_type_base + mb_width + 2\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    uint32_t *mb_type;\
+\
+    /**\
+     * log2 of the size of the block which a single vector in motion_val represents: \
+     * (4->16x16, 3->8x8, 2-> 4x4, 1-> 2x2)\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    uint8_t motion_subsample_log2;\
+\
+    /**\
+     * for some private data of the user\
+     * - encoding: unused\
+     * - decoding: Set by user.\
+     */\
+    void *opaque;\
+\
+    /**\
+     * error\
+     * - encoding: Set by libavcodec. if flags&CODEC_FLAG_PSNR.\
+     * - decoding: unused\
+     */\
+    uint64_t error[4];\
+\
+    /**\
+     * type of the buffer (to keep track of who has to deallocate data[*])\
+     * - encoding: Set by the one who allocates it.\
+     * - decoding: Set by the one who allocates it.\
+     * Note: User allocated (direct rendering) & internal buffers cannot coexist currently.\
+     */\
+    int type;\
+    \
+    /**\
+     * When decoding, this signals how much the picture must be delayed.\
+     * extra_delay = repeat_pict / (2*fps)\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    int repeat_pict;\
+    \
+    /**\
+     * \
+     */\
+    int qscale_type;\
+    \
+    /**\
+     * The content of the picture is interlaced.\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec. (default 0)\
+     */\
+    int interlaced_frame;\
+    \
+    /**\
+     * If the content is interlaced, is top field displayed first.\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int top_field_first;\
+    \
+    /**\
+     * Pan scan.\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    AVPanScan *pan_scan;\
+    \
+    /**\
+     * Tell user application that palette has changed from previous frame.\
+     * - encoding: ??? (no palette-enabled encoder yet)\
+     * - decoding: Set by libavcodec. (default 0).\
+     */\
+    int palette_has_changed;\
+    \
+    /**\
+     * codec suggestion on buffer type if != 0\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec. (before get_buffer() call)).\
+     */\
+    int buffer_hints;\
+\
+    /**\
+     * DCT coefficients\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    short *dct_coeff;\
+\
+    /**\
+     * motion referece frame index\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int8_t *ref_index[2];\
+\
+    /**\
+     * reordered opaque 64bit number (generally a PTS) from AVCodecContext.reordered_opaque\
+     * output in AVFrame.reordered_opaque\
+     * - encoding: unused\
+     * - decoding: Read by user.\
+     */\
+    int64_t reordered_opaque;\
+\
+    /**\
+     * hardware accelerator private data (FFmpeg allocated)\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec\
+     */\
+    void *hwaccel_picture_private;\
+#define FF_QSCALE_TYPE_MPEG1 0
+#define FF_QSCALE_TYPE_MPEG2 1
+#define FF_QSCALE_TYPE_H264  2
+#define FF_BUFFER_TYPE_INTERNAL 1
+#define FF_BUFFER_TYPE_USER     2 ///< direct rendering buffers (image is (de)allocated by user)
+#define FF_BUFFER_TYPE_SHARED   4 ///< Buffer from somewhere else; don't deallocate image (data/base), all other tables are not shared.
+#define FF_BUFFER_TYPE_COPY     8 ///< Just a (modified) copy of some other buffer, don't deallocate anything.
+#define FF_I_TYPE  1 ///< Intra
+#define FF_P_TYPE  2 ///< Predicted
+#define FF_B_TYPE  3 ///< Bi-dir predicted
+#define FF_S_TYPE  4 ///< S(GMC)-VOP MPEG4
+#define FF_SI_TYPE 5 ///< Switching Intra
+#define FF_SP_TYPE 6 ///< Switching Predicted
+#define FF_BI_TYPE 7
+#define FF_BUFFER_HINTS_VALID    0x01 // Buffer hints value is meaningful (if 0 ignore).
+#define FF_BUFFER_HINTS_READABLE 0x02 // Codec will read from buffer.
+#define FF_BUFFER_HINTS_PRESERVE 0x04 // User must not alter buffer content.
+#define FF_BUFFER_HINTS_REUSABLE 0x08 // Codec will reuse the buffer (update).
+/**
+ * Audio Video Frame.
+ * New fields can be added to the end of FF_COMMON_FRAME with minor version
+ * bumps.
+ * Removal, reordering and changes to existing fields require a major
+ * version bump. No fields should be added into AVFrame before or after
+ * FF_COMMON_FRAME!
+ * sizeof(AVFrame) must not be used outside libav*.
+ */
+typedef struct AVFrame {
+    FF_COMMON_FRAME
+} AVFrame;
+#endif/* MT : DELETE THIS LINE.*/
+/**
+ * main external API structure.
+ * New fields can be added to the end with minor version bumps.
+ * Removal, reordering and changes to existing fields require a major
+ * version bump.
+ * sizeof(AVCodecContext) must not be used outside libav*.
+ */
+typedef struct AVCodecContext {
+    /**
+     * information on struct for av_log
+     * - set by avcodec_alloc_context
+     */
+    const AVClass *av_class;
+    /**
+     * the average bitrate
+     * - encoding: Set by user; unused for constant quantizer encoding.
+     * - decoding: Set by libavcodec. 0 or some bitrate if this info is available in the stream.
+     */
+    int bit_rate;
+    /**
+     * number of bits the bitstream is allowed to diverge from the reference.
+     *           the reference can be CBR (for CBR pass1) or VBR (for pass2)
+     * - encoding: Set by user; unused for constant quantizer encoding.
+     * - decoding: unused
+     */
+//MT:    int bit_rate_tolerance;
+    /**
+     * CODEC_FLAG_*.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+//MT:    int flags;
+    /**
+     * Some codecs need additional format info. It is stored here.
+     * If any muxer uses this then ALL demuxers/parsers AND encoders for the
+     * specific codec MUST set it correctly otherwise stream copy breaks.
+     * In general use of this field by muxers is not recommanded.
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec. (FIXME: Is this OK?)
+     */
+//MT:    int sub_id;
+    /**
+     * Motion estimation algorithm used for video coding.
+     * 1 (zero), 2 (full), 3 (log), 4 (phods), 5 (epzs), 6 (x1), 7 (hex),
+     * 8 (umh), 9 (iter), 10 (tesa) [7, 8, 10 are x264 specific, 9 is snow specific]
+     * - encoding: MUST be set by user.
+     * - decoding: unused
+     */
+//MT:    int me_method;
+    /**
+     * some codecs need / can use extradata like Huffman tables.
+     * mjpeg: Huffman tables
+     * rv10: additional flags
+     * mpeg4: global headers (they can be in the bitstream or here)
+     * The allocated memory should be FF_INPUT_BUFFER_PADDING_SIZE bytes larger
+     * than extradata_size to avoid prolems if it is read with the bitstream reader.
+     * The bytewise contents of extradata must not depend on the architecture or CPU endianness.
+     * - encoding: Set/allocated/freed by libavcodec.
+     * - decoding: Set/allocated/freed by user.
+     */
+    uint8_t *extradata;
+    int extradata_size;
+    /**
+     * This is the fundamental unit of time (in seconds) in terms
+     * of which frame timestamps are represented. For fixed-fps content,
+     * timebase should be 1/framerate and timestamp increments should be
+     * identically 1.
+     * - encoding: MUST be set by user.
+     * - decoding: Set by libavcodec.
+     */
+//MT:    AVRational time_base;
+    /* video only */
+    /**
+     * picture width / height.
+     * - encoding: MUST be set by user.
+     * - decoding: Set by libavcodec.
+     * Note: For compatibility it is possible to set this instead of
+     * coded_width/height before decoding.
+     */
+//MT:    int width, height;
+//MT:#define FF_ASPECT_EXTENDED 15
+    /**
+     * the number of pictures in a group of pictures, or 0 for intra_only
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int gop_size;
+    /**
+     * Pixel format, see PIX_FMT_xxx.
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:   enum PixelFormat pix_fmt;
+    /**
+     * Frame rate emulation. If not zero, the lower layer (i.e. format handler)
+     * has to read frames at native frame rate.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int rate_emu;
+    /**
+     * If non NULL, 'draw_horiz_band' is called by the libavcodec
+     * decoder to draw a horizontal band. It improves cache usage. Not
+     * all codecs can do that. You must check the codec capabilities
+     * beforehand.
+     * The function is also used by hardware acceleration APIs.
+     * It is called at least once during frame decoding to pass
+     * the data needed for hardware render.
+     * In that mode instead of pixel data, AVFrame points to
+     * a structure specific to the acceleration API. The application
+     * reads the structure and can change some fields to indicate progress
+     * or mark state.
+     * - encoding: unused
+     * - decoding: Set by user.
+     * @param height the height of the slice
+     * @param y the y position of the slice
+     * @param type 1->top field, 2->bottom field, 3->frame
+     * @param offset offset into the AVFrame.data from which the slice should be read
+     */
+ //MT:   void (*draw_horiz_band)(struct AVCodecContext *s,
+ //Mt:                           const AVFrame *src, int offset[4],
+ //MT:                           int y, int type, int height);
+    /* audio only */
+    int sample_rate; ///< samples per second
+    int channels;    ///< number of audio channels
+    /**
+     * audio sample format
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    enum SampleFormat sample_fmt;  ///< sample format, currently unused
+    /* The following data should not be initialized. */
+    /**
+     * Samples per packet, initialized when calling 'init'.
+     */
+    int frame_size;
+    int frame_number;   ///< audio or video frame number
+//MT:    int real_pict_num;  ///< Returns the real picture number of previous encoded frame.
+    /**
+     * Number of frames the decoded output will be delayed relative to
+     * the encoded input.
+     * - encoding: Set by libavcodec.
+     * - decoding: unused
+     */
+//MT:    int delay;
+    /* - encoding parameters */
+//MT:    float qcompress;  ///< amount of qscale change between easy & hard scenes (0.0-1.0)
+//MT:    float qblur;      ///< amount of qscale smoothing over time (0.0-1.0)
+    /**
+     * minimum quantizer
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int qmin;
+    /**
+     * maximum quantizer
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int qmax;
+    /**
+     * maximum quantizer difference between frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int max_qdiff;
+    /**
+     * maximum number of B-frames between non-B-frames
+     * Note: The output will be delayed by max_b_frames+1 relative to the input.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int max_b_frames;
+    /**
+     * qscale factor between IP and B-frames
+     * If > 0 then the last P-frame quantizer will be used (q= lastp_q*factor+offset).
+     * If < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float b_quant_factor;
+    /** obsolete FIXME remove */
+//MT:    int rc_strategy;
+//MT:#define FF_RC_STRATEGY_XVID 1
+//MT:    int b_frame_strategy;
+    /**
+     * hurry up amount
+     * - encoding: unused
+     * - decoding: Set by user. 1-> Skip B-frames, 2-> Skip IDCT/dequant too, 5-> Skip everything except header
+     * @deprecated Deprecated in favor of skip_idct and skip_frame.
+     */
+//MT:    int hurry_up;
+    struct AVCodec *codec;
+    void *priv_data;
+//MT:    int rtp_payload_size;   /* The size of the RTP payload: the coder will  */
+                            /* do its best to deliver a chunk with size     */
+                            /* below rtp_payload_size, the chunk will start */
+                            /* with a start code on some codecs like H.263. */
+                            /* This doesn't take account of any particular  */
+                            /* headers inside the transmitted RTP payload.  */
+    /* The RTP callback: This function is called    */
+    /* every time the encoder has a packet to send. */
+    /* It depends on the encoder if the data starts */
+    /* with a Start Code (it should). H.263 does.   */
+    /* mb_nb contains the number of macroblocks     */
+    /* encoded in the RTP payload.                  */
+//MT:    void (*rtp_callback)(struct AVCodecContext *avctx, void *data, int size, int mb_nb);
+    /* statistics, used for 2-pass encoding */
+//MT:    int mv_bits;
+//MT:    int header_bits;
+//MT:    int i_tex_bits;
+ //MT:   int p_tex_bits;
+//MT:    int i_count;
+ //MT:   int p_count;
+ //MT:   int skip_count;
+ //MT:   int misc_bits;
+    /**
+     * number of bits used for the previously encoded frame
+     * - encoding: Set by libavcodec.
+     * - decoding: unused
+     */
+//MT:    int frame_bits;
+    /**
+     * Private data of the user, can be used to carry app specific stuff.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+//MT:    void *opaque;
+    char codec_name[32];
+    enum CodecType codec_type; /* see CODEC_TYPE_xxx */
+    enum CodecID codec_id; /* see CODEC_ID_xxx */
+    /**
+     * fourcc (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
+     * This is used to work around some encoder bugs.
+     * A demuxer should set this to what is stored in the field used to identify the codec.
+     * If there are multiple such fields in a container then the demuxer should choose the one
+     * which maximizes the information about the used codec.
+     * If the codec tag field in a container is larger then 32 bits then the demuxer should
+     * remap the longer ID to 32 bits with a table or other structure. Alternatively a new
+     * extra_codec_tag + size could be added but for this a clear advantage must be demonstrated
+     * first.
+     * - encoding: Set by user, if not then the default based on codec_id will be used.
+     * - decoding: Set by user, will be converted to uppercase by libavcodec during init.
+     */
+//MT:    unsigned int codec_tag;
+    /**
+     * Work around bugs in encoders which sometimes cannot be detected automatically.
+     * - encoding: Set by user
+     * - decoding: Set by user
+     */
+//MT:    int workaround_bugs;
+#define FF_BUG_AUTODETECT       1  ///< autodetection
+#define FF_BUG_OLD_MSMPEG4      2
+#define FF_BUG_XVID_ILACE       4
+#define FF_BUG_UMP4             8
+#define FF_BUG_NO_PADDING       16
+#define FF_BUG_AMV              32
+#define FF_BUG_AC_VLC           0  ///< Will be removed, libavcodec can now handle these non-compliant files by default.
+#define FF_BUG_QPEL_CHROMA      64
+#define FF_BUG_STD_QPEL         128
+#define FF_BUG_QPEL_CHROMA2     256
+#define FF_BUG_DIRECT_BLOCKSIZE 512
+#define FF_BUG_EDGE             1024
+#define FF_BUG_HPEL_CHROMA      2048
+#define FF_BUG_DC_CLIP          4096
+#define FF_BUG_MS               8192 ///< Work around various bugs in Microsoft's broken decoders.
+//#define FF_BUG_FAKE_SCALABILITY 16 //Autodetection should work 100%.
+    /**
+     * luma single coefficient elimination threshold
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int luma_elim_threshold;
+    /**
+     * chroma single coeff elimination threshold
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int chroma_elim_threshold;
+    /**
+     * strictly follow the standard (MPEG4, ...).
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     * Setting this to STRICT or higher means the encoder and decoder will
+     * generally do stupid things. While setting it to inofficial or lower
+     * will mean the encoder might use things that are not supported by all
+     * spec compliant decoders. Decoders make no difference between normal,
+     * inofficial and experimental, that is they always try to decode things
+     * when they can unless they are explicitly asked to behave stupid
+     * (=strictly conform to the specs)
+     */
+//MT:    int strict_std_compliance;
+#define FF_COMPLIANCE_VERY_STRICT   2 ///< Strictly conform to a older more strict version of the spec or reference software.
+#define FF_COMPLIANCE_STRICT        1 ///< Strictly conform to all the things in the spec no matter what consequences.
+#define FF_COMPLIANCE_NORMAL        0
+#define FF_COMPLIANCE_INOFFICIAL   -1 ///< Allow inofficial extensions.
+#define FF_COMPLIANCE_EXPERIMENTAL -2 ///< Allow nonstandardized experimental things.
+    /**
+     * qscale offset between IP and B-frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float b_quant_offset;
+    /**
+     * Error recognization; higher values will detect more errors but may
+     * misdetect some more or less valid parts as errors.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+//MT:    int error_recognition;
+#define FF_ER_CAREFUL         1
+#define FF_ER_COMPLIANT       2
+#define FF_ER_AGGRESSIVE      3
+#define FF_ER_VERY_AGGRESSIVE 4
+    /**
+     * Called at the beginning of each frame to get a buffer for it.
+     * If pic.reference is set then the frame will be read later by libavcodec.
+     * avcodec_align_dimensions() should be used to find the required width and
+     * height, as they normally need to be rounded up to the next multiple of 16.
+     * - encoding: unused
+     * - decoding: Set by libavcodec., user can override.
+     */
+ //MT:   int (*get_buffer)(struct AVCodecContext *c, AVFrame *pic);
+    /**
+     * Called to release buffers which were allocated with get_buffer.
+     * A released buffer can be reused in get_buffer().
+     * pic.data[*] must be set to NULL.
+     * - encoding: unused
+     * - decoding: Set by libavcodec., user can override.
+     */
+//MT:    void (*release_buffer)(struct AVCodecContext *c, AVFrame *pic);
+    /**
+     * Size of the frame reordering buffer in the decoder.
+     * For MPEG-2 it is 1 IPB or 0 low delay IP.
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+//MT:    int has_b_frames;
+    /**
+     * number of bytes per packet if constant and known or 0
+     * Used by some WAV based audio codecs.
+     */
+    int block_align;
+//MT:    int parse_only; /* - decoding only: If true, only parsing is done
+                       /*(function avcodec_parse_frame()). The frame
+                       data is returned. Only MPEG codecs support this now. */
+    /**
+     * 0-> h263 quant 1-> mpeg quant
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int mpeg_quant;
+    /**
+     * pass1 encoding statistics output buffer
+     * - encoding: Set by libavcodec.
+     * - decoding: unused
+     */
+//MT:    char *stats_out;
+    /**
+     * pass2 encoding statistics input buffer
+     * Concatenated stuff from stats_out of pass1 should be placed here.
+     * - encoding: Allocated/set/freed by user.
+     * - decoding: unused
+     */
+//MT:    char *stats_in;
+    /**
+     * ratecontrol qmin qmax limiting method
+     * 0-> clipping, 1-> use a nice continous function to limit qscale wthin qmin/qmax.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float rc_qsquish;
+//MT:    float rc_qmod_amp;
+ //MT:   int rc_qmod_freq;
+    /**
+     * ratecontrol override, see RcOverride
+     * - encoding: Allocated/set/freed by user.
+     * - decoding: unused
+     */
+ //MT:   RcOverride *rc_override;
+ //MT:   int rc_override_count;
+    /**
+     * rate control equation
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+//MT:    const char *rc_eq;
+    /**
+     * maximum bitrate
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int rc_max_rate;
+    /**
+     * minimum bitrate
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int rc_min_rate;
+    /**
+     * decoder bitstream buffer size
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int rc_buffer_size;
+ //MT:   float rc_buffer_aggressivity;
+    /**
+     * qscale factor between P and I-frames
+     * If > 0 then the last p frame quantizer will be used (q= lastp_q*factor+offset).
+     * If < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float i_quant_factor;
+    /**
+     * qscale offset between P and I-frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float i_quant_offset;
+    /**
+     * initial complexity for pass1 ratecontrol
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float rc_initial_cplx;
+    /**
+     * DCT algorithm, see FF_DCT_* below
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int dct_algo;
+#define FF_DCT_AUTO    0
+#define FF_DCT_FASTINT 1
+#define FF_DCT_INT     2
+#define FF_DCT_MMX     3
+#define FF_DCT_MLIB    4
+#define FF_DCT_ALTIVEC 5
+#define FF_DCT_FAAN    6
+    /**
+     * luminance masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   float lumi_masking;
+    /**
+     * temporary complexity masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   float temporal_cplx_masking;
+    /**
+     * spatial complexity masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   float spatial_cplx_masking;
+    /**
+     * p block masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   float p_masking;
+    /**
+     * darkness masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   float dark_masking;
+    /**
+     * IDCT algorithm, see FF_IDCT_* below.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+ //MT:   int idct_algo;
+#define FF_IDCT_AUTO          0
+#define FF_IDCT_INT           1
+#define FF_IDCT_SIMPLE        2
+#define FF_IDCT_SIMPLEMMX     3
+#define FF_IDCT_LIBMPEG2MMX   4
+#define FF_IDCT_PS2           5
+#define FF_IDCT_MLIB          6
+#define FF_IDCT_ARM           7
+#define FF_IDCT_ALTIVEC       8
+#define FF_IDCT_SH4           9
+#define FF_IDCT_SIMPLEARM     10
+#define FF_IDCT_H264          11
+#define FF_IDCT_VP3           12
+#define FF_IDCT_IPP           13
+#define FF_IDCT_XVIDMMX       14
+#define FF_IDCT_CAVS          15
+#define FF_IDCT_SIMPLEARMV5TE 16
+#define FF_IDCT_SIMPLEARMV6   17
+#define FF_IDCT_SIMPLEVIS     18
+#define FF_IDCT_WMV2          19
+#define FF_IDCT_FAAN          20
+#define FF_IDCT_EA            21
+#define FF_IDCT_SIMPLENEON    22
+#define FF_IDCT_SIMPLEALPHA   23
+    /**
+     * slice count
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by user (or 0).
+     */
+//MT:    int slice_count;
+    /**
+     * slice offsets in the frame in bytes
+     * - encoding: Set/allocated by libavcodec.
+     * - decoding: Set/allocated by user (or NULL).
+     */
+ //MT:   int *slice_offset;
+    /**
+     * error concealment flags
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+//MT:    int error_concealment;
+#define FF_EC_GUESS_MVS   1
+#define FF_EC_DEBLOCK     2
+    /**
+     * dsp_mask could be add used to disable unwanted CPU features
+     * CPU features (i.e. MMX, SSE. ...)
+     *
+     * With the FORCE flag you may instead enable given CPU features.
+     * (Dangerous: Usable in case of misdetection, improper usage however will
+     * result into program crash.)
+     */
+ //MT:   unsigned dsp_mask;
+#define FF_MM_FORCE    0x80000000 /* Force usage of selected flags (OR) */
+    /* lower 16 bits - CPU features */
+#define FF_MM_MMX      0x0001 ///< standard MMX
+#define FF_MM_3DNOW    0x0004 ///< AMD 3DNOW
+#define FF_MM_MMXEXT   0x0002 ///< SSE integer functions or AMD MMX ext
+#define FF_MM_SSE      0x0008 ///< SSE functions
+#define FF_MM_SSE2     0x0010 ///< PIV SSE2 functions
+#define FF_MM_3DNOWEXT 0x0020 ///< AMD 3DNowExt
+#define FF_MM_SSE3     0x0040 ///< Prescott SSE3 functions
+#define FF_MM_SSSE3    0x0080 ///< Conroe SSSE3 functions
+#define FF_MM_IWMMXT   0x0100 ///< XScale IWMMXT
+#define FF_MM_ALTIVEC  0x0001 ///< standard AltiVec
+    /**
+     * bits per sample/pixel from the demuxer (needed for huffyuv).
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by user.
+     */
+ //MT:    int bits_per_coded_sample;
+    /**
+     * prediction method (needed for huffyuv)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:    int prediction_method;
+#define FF_PRED_LEFT   0
+#define FF_PRED_PLANE  1
+#define FF_PRED_MEDIAN 2
+    /**
+     * sample aspect ratio (0 if unknown)
+     * That is the width of a pixel divided by the height of the pixel.
+     * Numerator and denominator must be relatively prime and smaller than 256 for some video standards.
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:   AVRational sample_aspect_ratio;
+    /**
+     * the picture in the bitstream
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:   AVFrame *coded_frame;
+    /**
+     * debug
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+ //MT:   int debug;
+#define FF_DEBUG_PICT_INFO   1
+#define FF_DEBUG_RC          2
+#define FF_DEBUG_BITSTREAM   4
+#define FF_DEBUG_MB_TYPE     8
+#define FF_DEBUG_QP          16
+#define FF_DEBUG_MV          32
+#define FF_DEBUG_DCT_COEFF   0x00000040
+#define FF_DEBUG_SKIP        0x00000080
+#define FF_DEBUG_STARTCODE   0x00000100
+#define FF_DEBUG_PTS         0x00000200
+#define FF_DEBUG_ER          0x00000400
+#define FF_DEBUG_MMCO        0x00000800
+#define FF_DEBUG_BUGS        0x00001000
+#define FF_DEBUG_VIS_QP      0x00002000
+#define FF_DEBUG_VIS_MB_TYPE 0x00004000
+#define FF_DEBUG_BUFFERS     0x00008000
+    /**
+     * debug
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+ //MT:   int debug_mv;
+#define FF_DEBUG_VIS_MV_P_FOR  0x00000001 //visualize forward predicted MVs of P frames
+#define FF_DEBUG_VIS_MV_B_FOR  0x00000002 //visualize forward predicted MVs of B frames
+#define FF_DEBUG_VIS_MV_B_BACK 0x00000004 //visualize backward predicted MVs of B frames
+    /**
+     * error
+     * - encoding: Set by libavcodec if flags&CODEC_FLAG_PSNR.
+     * - decoding: unused
+     */
+    uint64_t error[4];
+    /**
+     * minimum MB quantizer
+     * - encoding: unused
+     * - decoding: unused
+     */
+//MT:    int mb_qmin;
+    /**
+     * maximum MB quantizer
+     * - encoding: unused
+     * - decoding: unused
+     */
+//MT:    int mb_qmax;
+    /**
+     * motion estimation comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int me_cmp;
+    /**
+     * subpixel motion estimation comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int me_sub_cmp;
+    /**
+     * macroblock comparison function (not supported yet)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int mb_cmp;
+    /**
+     * interlaced DCT comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int ildct_cmp;
+#define FF_CMP_SAD    0
+#define FF_CMP_SSE    1
+#define FF_CMP_SATD   2
+#define FF_CMP_DCT    3
+#define FF_CMP_PSNR   4
+#define FF_CMP_BIT    5
+#define FF_CMP_RD     6
+#define FF_CMP_ZERO   7
+#define FF_CMP_VSAD   8
+#define FF_CMP_VSSE   9
+#define FF_CMP_NSSE   10
+#define FF_CMP_W53    11
+#define FF_CMP_W97    12
+#define FF_CMP_DCTMAX 13
+#define FF_CMP_DCT264 14
+#define FF_CMP_CHROMA 256
+    /**
+     * ME diamond size & shape
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int dia_size;
+    /**
+     * amount of previous MV predictors (2a+1 x 2a+1 square)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int last_predictor_count;
+    /**
+     * prepass for motion estimation
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int pre_me;
+    /**
+     * motion estimation prepass comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int me_pre_cmp;
+    /**
+     * ME prepass diamond size & shape
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int pre_dia_size;
+    /**
+     * subpel ME quality
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int me_subpel_quality;
+    /**
+     * callback to negotiate the pixelFormat
+     * @param fmt is the list of formats which are supported by the codec,
+     * it is terminated by -1 as 0 is a valid format, the formats are ordered by quality.
+     * The first is always the native one.
+     * @return the chosen format
+     * - encoding: unused
+     * - decoding: Set by user, if not set the native format will be chosen.
+     */
+ //MT:   enum PixelFormat (*get_format)(struct AVCodecContext *s, const enum PixelFormat * fmt);
+    /**
+     * DTG active format information (additional aspect ratio
+     * information only used in DVB MPEG-2 transport streams)
+     * 0 if not set.
+     *
+     * - encoding: unused
+     * - decoding: Set by decoder.
+     */
+ //MT:   int dtg_active_format;
+#define FF_DTG_AFD_SAME         8
+#define FF_DTG_AFD_4_3          9
+#define FF_DTG_AFD_16_9         10
+#define FF_DTG_AFD_14_9         11
+#define FF_DTG_AFD_4_3_SP_14_9  13
+#define FF_DTG_AFD_16_9_SP_14_9 14
+#define FF_DTG_AFD_SP_4_3       15
+    /**
+     * maximum motion estimation search range in subpel units
+     * If 0 then no limit.
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int me_range;
+    /**
+     * intra quantizer bias
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int intra_quant_bias;
+#define FF_DEFAULT_QUANT_BIAS 999999
+    /**
+     * inter quantizer bias
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int inter_quant_bias;
+    /**
+     * color table ID
+     * - encoding: unused
+     * - decoding: Which clrtable should be used for 8bit RGB images.
+     *             Tables have to be stored somewhere. FIXME
+     */
+ //MT:   int color_table_id;
+    /**
+     * internal_buffer count
+     * Don't touch, used by libavcodec default_get_buffer().
+     */
+ //MT:   int internal_buffer_count;
+    /**
+     * internal_buffers
+     * Don't touch, used by libavcodec default_get_buffer().
+     */
+   void *internal_buffer;
+#define FF_LAMBDA_SHIFT 7
+#define FF_LAMBDA_SCALE (1<<FF_LAMBDA_SHIFT)
+#define FF_QP2LAMBDA 118 ///< factor to convert from H.263 QP to lambda
+#define FF_LAMBDA_MAX (256*128-1)
+#define FF_QUALITY_SCALE FF_LAMBDA_SCALE //FIXME maybe remove
+    /**
+     * Global quality for codecs which cannot change it per frame.
+     * This should be proportional to MPEG-1/2/4 qscale.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int global_quality;
+#define FF_CODER_TYPE_VLC       0
+#define FF_CODER_TYPE_AC        1
+#define FF_CODER_TYPE_RAW       2
+#define FF_CODER_TYPE_RLE       3
+#define FF_CODER_TYPE_DEFLATE   4
+    /**
+     * coder type
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int coder_type;
+    /**
+     * context model
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int context_model;
+#if 0
+    /**
+     *
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    uint8_t * (*realloc)(struct AVCodecContext *s, uint8_t *buf, int buf_size);
+#endif
+    /**
+     * slice flags
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:   int slice_flags;
+#define SLICE_FLAG_CODED_ORDER    0x0001 ///< draw_horiz_band() is called in coded order instead of display
+#define SLICE_FLAG_ALLOW_FIELD    0x0002 ///< allow draw_horiz_band() with field slices (MPEG2 field pics)
+#define SLICE_FLAG_ALLOW_PLANE    0x0004 ///< allow draw_horiz_band() with 1 component at a time (SVQ1)
+    /**
+     * XVideo Motion Acceleration
+     * - encoding: forbidden
+     * - decoding: set by decoder
+     */
+//MT:    int xvmc_acceleration;
+    /**
+     * macroblock decision mode
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int mb_decision;
+#define FF_MB_DECISION_SIMPLE 0        ///< uses mb_cmp
+#define FF_MB_DECISION_BITS   1        ///< chooses the one which needs the fewest bits
+#define FF_MB_DECISION_RD     2        ///< rate distortion
+    /**
+     * custom intra quantization matrix
+     * - encoding: Set by user, can be NULL.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:   uint16_t *intra_matrix;
+    /**
+     * custom inter quantization matrix
+     * - encoding: Set by user, can be NULL.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:   uint16_t *inter_matrix;
+    /**
+     * fourcc from the AVI stream header (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
+     * This is used to work around some encoder bugs.
+     * - encoding: unused
+     * - decoding: Set by user, will be converted to uppercase by libavcodec during init.
+     */
+ //MT:   unsigned int stream_codec_tag;
+    /**
+     * scene change detection threshold
+     * 0 is default, larger means fewer detected scene changes.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int scenechange_threshold;
+    /**
+     * minimum Lagrange multipler
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int lmin;
+    /**
+     * maximum Lagrange multipler
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int lmax;
+    /**
+     * palette control structure
+     * - encoding: ??? (no palette-enabled encoder yet)
+     * - decoding: Set by user.
+     */
+ //MT:   struct AVPaletteControl *palctrl;
+    /**
+     * noise reduction strength
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int noise_reduction;
+    /**
+     * Called at the beginning of a frame to get cr buffer for it.
+     * Buffer type (size, hints) must be the same. libavcodec won't check it.
+     * libavcodec will pass previous buffer in pic, function should return
+     * same buffer or new buffer with old frame "painted" into it.
+     * If pic.data[0] == NULL must behave like get_buffer().
+     * - encoding: unused
+     * - decoding: Set by libavcodec., user can override
+     */
+//MT:    int (*reget_buffer)(struct AVCodecContext *c, AVFrame *pic);
+    /**
+     * Number of bits which should be loaded into the rc buffer before decoding starts.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int rc_initial_buffer_occupancy;
+    /**
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int inter_threshold;
+    /**
+     * CODEC_FLAG2_*
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+ //MT:   int flags2;
+    /**
+     * Simulates errors in the bitstream to test error concealment.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int error_rate;
+    /**
+     * MP3 antialias algorithm, see FF_AA_* below.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:   int antialias_algo;
+#define FF_AA_AUTO    0
+#define FF_AA_FASTINT 1 //not implemented yet
+#define FF_AA_INT     2
+#define FF_AA_FLOAT   3
+    /**
+     * quantizer noise shaping
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int quantizer_noise_shaping;
+    /**
+     * thread count
+     * is used to decide how many independent tasks should be passed to execute()
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+ //MT:   int thread_count;
+    /**
+     * The codec may call this to execute several independent things.
+     * It will return only after finishing all tasks.
+     * The user may replace this with some multithreaded implementation,
+     * the default implementation will execute the parts serially.
+     * @param count the number of things to execute
+     * - encoding: Set by libavcodec, user can override.
+     * - decoding: Set by libavcodec, user can override.
+     */
+ //MT:   int (*execute)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg), void *arg2, int *ret, int count, int size);
+    /**
+     * thread opaque
+     * Can be used by execute() to store some per AVCodecContext stuff.
+     * - encoding: set by execute()
+     * - decoding: set by execute()
+     */
+ //MT:   void *thread_opaque;
+    /**
+     * Motion estimation threshold below which no motion estimation is
+     * performed, but instead the user specified motion vectors are used.
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:     int me_threshold;
+    /**
+     * Macroblock threshold below which the user specified macroblock types will be used.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:    int mb_threshold;
+    /**
+     * precision of the intra DC coefficient - 8
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:     int intra_dc_precision;
+    /**
+     * noise vs. sse weight for the nsse comparsion function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:     int nsse_weight;
+    /**
+     * Number of macroblock rows at the top which are skipped.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:    int skip_top;
+    /**
+     * Number of macroblock rows at the bottom which are skipped.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:    int skip_bottom;
+    /**
+     * profile
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:    int profile;
+#define FF_PROFILE_UNKNOWN -99
+#define FF_PROFILE_AAC_MAIN 0
+#define FF_PROFILE_AAC_LOW  1
+#define FF_PROFILE_AAC_SSR  2
+#define FF_PROFILE_AAC_LTP  3
+    /**
+     * level
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+ //MT:    int level;
+#define FF_LEVEL_UNKNOWN -99
+    /**
+     * low resolution decoding, 1-> 1/2 size, 2->1/4 size
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:    int lowres;
+    /**
+     * Bitstream width / height, may be different from width/height if lowres
+     * or other things are used.
+     * - encoding: unused
+     * - decoding: Set by user before init if known. Codec should override / dynamically change if needed.
+     */
+ //MT:   int coded_width, coded_height;
+    /**
+     * frame skip threshold
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int frame_skip_threshold;
+    /**
+     * frame skip factor
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int frame_skip_factor;
+    /**
+     * frame skip exponent
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int frame_skip_exp;
+    /**
+     * frame skip comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int frame_skip_cmp;
+    /**
+     * Border processing masking, raises the quantizer for mbs on the borders
+     * of the picture.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float border_masking;
+    /**
+     * minimum MB lagrange multipler
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int mb_lmin;
+    /**
+     * maximum MB lagrange multipler
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int mb_lmax;
+    /**
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int me_penalty_compensation;
+    /**
+     *
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:   enum AVDiscard skip_loop_filter;
+    /**
+     *
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:   enum AVDiscard skip_idct;
+    /**
+     *
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:   enum AVDiscard skip_frame;
+    /**
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int bidir_refine;
+    /**
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int brd_scale;
+    /**
+     * constant rate factor - quality-based VBR - values ~correspond to qps
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   float crf;
+    /**
+     * constant quantization parameter rate control method
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int cqp;
+    /**
+     * minimum GOP size
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int keyint_min;
+    /**
+     * number of reference frames
+     * - encoding: Set by user.
+     * - decoding: Set by lavc.
+     */
+ //MT:   int refs;
+    /**
+     * chroma qp offset from luma
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int chromaoffset;
+    /**
+     * Influences how often B-frames are used.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int bframebias;
+    /**
+     * trellis RD quantization
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int trellis;
+    /**
+     * Reduce fluctuations in qp (before curve compression).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    float complexityblur;
+    /**
+     * in-loop deblocking filter alphac0 parameter
+     * alpha is in the range -6...6
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int deblockalpha;
+    /**
+     * in-loop deblocking filter beta parameter
+     * beta is in the range -6...6
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int deblockbeta;
+    /**
+     * macroblock subpartition sizes to consider - p8x8, p4x4, b8x8, i8x8, i4x4
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int partitions;
+#define X264_PART_I4X4 0x001  /* Analyze i4x4 */
+#define X264_PART_I8X8 0x002  /* Analyze i8x8 (requires 8x8 transform) */
+#define X264_PART_P8X8 0x010  /* Analyze p16x8, p8x16 and p8x8 */
+#define X264_PART_P4X4 0x020  /* Analyze p8x4, p4x8, p4x4 */
+#define X264_PART_B8X8 0x100  /* Analyze b16x8, b8x16 and b8x8 */
+    /**
+     * direct MV prediction mode - 0 (none), 1 (spatial), 2 (temporal), 3 (auto)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int directpred;
+    /**
+     * Audio cutoff bandwidth (0 means "automatic")
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int cutoff;
+    /**
+     * Multiplied by qscale for each frame and added to scene_change_score.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int scenechange_factor;
+    /**
+     *
+     * Note: Value depends upon the compare function used for fullpel ME.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int mv0_threshold;
+    /**
+     * Adjusts sensitivity of b_frame_strategy 1.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int b_sensitivity;
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int compression_level;
+#define FF_COMPRESSION_DEFAULT -1
+    /**
+     * Sets whether to use LPC mode - used by FLAC encoder.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int use_lpc;
+    /**
+     * LPC coefficient precision - used by FLAC encoder
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int lpc_coeff_precision;
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int min_prediction_order;
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int max_prediction_order;
+    /**
+     * search method for selecting prediction order
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+//MT:    int prediction_order_method;
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int min_partition_order;
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+ //MT:   int max_partition_order;
+    /**
+     * GOP timecode frame start number, in non drop frame format
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+  //MT:  int64_t timecode_frame_start;
+#if LIBAVCODEC_VERSION_MAJOR < 53
+    /**
+     * Decoder should decode to this many channels if it can (0 for default)
+     * - encoding: unused
+     * - decoding: Set by user.
+     * @deprecated Deprecated in favor of request_channel_layout.
+     */
+ //MT:   int request_channels;
+#endif
+    /**
+     * Percentage of dynamic range compression to be applied by the decoder.
+     * The default value is 1.0, corresponding to full compression.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+  //MT:  float drc_scale;
+    /**
+     * opaque 64bit number (generally a PTS) that will be reordered and
+     * output in AVFrame.reordered_opaque
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+ //MT:   int64_t reordered_opaque;
+    /**
+     * Bits per sample/pixel of internal libavcodec pixel/sample format.
+     * This field is applicable only when sample_fmt is SAMPLE_FMT_S32.
+     * - encoding: set by user.
+     * - decoding: set by libavcodec.
+     */
+ //MT:   int bits_per_raw_sample;
+    /**
+     * Audio channel layout.
+     * - encoding: set by user.
+     * - decoding: set by libavcodec.
+     */
+    int64_t channel_layout;
+    /**
+     * Request decoder to use this channel layout if it can (0 for default)
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+//MT:    int64_t request_channel_layout;
+    /**
+     * Ratecontrol attempt to use, at maximum, <value> of what can be used without an underflow.
+     * - encoding: Set by user.
+     * - decoding: unused.
+     */
+ //MT:   float rc_max_available_vbv_use;
+    /**
+     * Ratecontrol attempt to use, at least, <value> times the amount needed to prevent a vbv overflow.
+     * - encoding: Set by user.
+     * - decoding: unused.
+     */
+//MT:    float rc_min_vbv_overflow_use;
+    /**
+     * Hardware accelerator in use
+     * - encoding: unused.
+     * - decoding: Set by libavcodec
+     */
+//MT:    struct AVHWAccel *hwaccel;
+    /**
+     * For some codecs, the time base is closer to the field rate than the frame rate.
+     * Most notably, H.264 and MPEG-2 specify time_base as half of frame duration
+     * if no telecine is used ...
+     *
+     * Set to time_base ticks per frame. Default 1, e.g., H.264/MPEG-2 set it to 2.
+     */
+//MT:    int ticks_per_frame;
+    /**
+     * Hardware accelerator context.
+     * For some hardware accelerators, a global context needs to be
+     * provided by the user. In that case, this holds display-dependent
+     * data FFmpeg cannot instantiate itself. Please refer to the
+     * FFmpeg HW accelerator documentation to know how to fill this
+     * is. e.g. for VA API, this is a struct vaapi_context.
+     * - encoding: unused
+     * - decoding: Set by user
+     */
+ //MT:   void *hwaccel_context;
+} AVCodecContext;
+/**
+ * AVCodec.
+ */
+typedef struct AVCodec {
+    /**
+     * Name of the codec implementation.
+     * The name is globally unique among encoders and among decoders (but an
+     * encoder and a decoder can share the same name).
+     * This is the primary way to find a codec from the user perspective.
+     */
+    const char *name;
+    enum CodecType type;
+    enum CodecID id;
+    int priv_data_size;
+    int (*init)(AVCodecContext *);
+    int (*encode)(AVCodecContext *, uint8_t *buf, int buf_size, void *data);
+    int (*close)(AVCodecContext *);
+    int (*decode)(AVCodecContext *, void *outdata, int *outdata_size,
+                  const uint8_t *buf, int buf_size);
+    /**
+     * Codec capabilities.
+     * see CODEC_CAP_*
+     */
+//MT:    int capabilities;
+//MT:    struct AVCodec *next;
+    /**
+     * Flush buffers.
+     * Will be called when seeking
+     */
+//MT:    void (*flush)(AVCodecContext *);
+ //MT:   const AVRational *supported_framerates; ///< array of supported framerates, or NULL if any, array is terminated by {0,0}
+ //MT:   const enum PixelFormat *pix_fmts;       ///< array of supported pixel formats, or NULL if unknown, array is terminated by -1
+    /**
+     * Descriptive name for the codec, meant to be more human readable than \p name.
+     * You \e should use the NULL_IF_CONFIG_SMALL() macro to define it.
+     */
+    const char *long_name;
+    const int *supported_samplerates;       ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0
+    const enum SampleFormat *sample_fmts;   ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
+    const int64_t *channel_layouts;         ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
+} AVCodec;
+typedef struct AVPacket {
+    /**
+     * Presentation timestamp in AVStream->time_base units; the time at which
+     * the decompressed packet will be presented to the user.
+     * Can be AV_NOPTS_VALUE if it is not stored in the file.
+     * pts MUST be larger or equal to dts as presentation cannot happen before
+     * decompression, unless one wants to view hex dumps. Some formats misuse
+     * the terms dts and pts/cts to mean something different. Such timestamps
+     * must be converted to true pts/dts before they are stored in AVPacket.
+     */
+    int64_t pts;
+    /**
+     * Decompression timestamp in AVStream->time_base units; the time at which
+     * the packet is decompressed.
+     * Can be AV_NOPTS_VALUE if it is not stored in the file.
+     */
+    int64_t dts;
+    uint8_t *data;
+    int   size;
+    int   stream_index;
+    int   flags;
+    /**
+     * Duration of this packet in AVStream->time_base units, 0 if unknown.
+     * Equals next_pts - this_pts in presentation order.
+     */
+    int   duration;
+    void  (*destruct)(struct AVPacket *);
+    void  *priv;
+    int64_t pos;                            ///< byte position in stream, -1 if unknown
+    /**
+     * Time difference in AVStream->time_base units from the pts of this
+     * packet to the point at which the output from the decoder has converged
+     * independent from the availability of previous frames. That is, the
+     * frames are virtually identical no matter if decoding started from
+     * the very first frame or from this keyframe.
+     * Is AV_NOPTS_VALUE if unknown.
+     * This field is not the display duration of the current packet.
+     *
+     * The purpose of this field is to allow seeking in streams that have no
+     * keyframes in the conventional sense. It corresponds to the
+     * recovery point SEI in H.264 and match_time_delta in NUT. It is also
+     * essential for some types of subtitle streams to ensure that all
+     * subtitles are correctly displayed after seeking.
+     */
+    int64_t convergence_duration;
+} AVPacket;
+#if 0 /* MT : DELETE THIS LINE ONLY. */
+/**
+ * AVHWAccel.
+ */
+typedef struct AVHWAccel {
+    /**
+     * Name of the hardware accelerated codec.
+     * The name is globally unique among encoders and among decoders (but an
+     * encoder and a decoder can share the same name).
+     */
+    const char *name;
+    /**
+     * Type of codec implemented by the hardware accelerator.
+     *
+     * See CODEC_TYPE_xxx
+     */
+    enum CodecType type;
+    /**
+     * Codec implemented by the hardware accelerator.
+     *
+     * See CODEC_ID_xxx
+     */
+    enum CodecID id;
+    /**
+     * Supported pixel format.
+     *
+     * Only hardware accelerated formats are supported here.
+     */
+    enum PixelFormat pix_fmt;
+    /**
+     * Hardware accelerated codec capabilities.
+     * see FF_HWACCEL_CODEC_CAP_*
+     */
+    int capabilities;
+    struct AVHWAccel *next;
+    /**
+     * Called at the beginning of each frame or field picture.
+     *
+     * Meaningful frame information (codec specific) is guaranteed to
+     * be parsed at this point. This function is mandatory.
+     *
+     * Note that \p buf can be NULL along with \p buf_size set to 0.
+     * Otherwise, this means the whole frame is available at this point.
+     *
+     * @param avctx the codec context
+     * @param buf the frame data buffer base
+     * @param buf_size the size of the frame in bytes
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
+    /**
+     * Callback for each slice.
+     *
+     * Meaningful slice information (codec specific) is guaranteed to
+     * be parsed at this point. This function is mandatory.
+     *
+     * @param avctx the codec context
+     * @param buf the slice data buffer base
+     * @param buf_size the size of the slice in bytes
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
+    /**
+     * Called at the end of each frame or field picture.
+     *
+     * The whole picture is parsed at this point and can now be sent
+     * to the hardware accelerator. This function is mandatory.
+     *
+     * @param avctx the codec context
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*end_frame)(AVCodecContext *avctx);
+    /**
+     * Size of HW accelerator private data.
+     *
+     * Private data is allocated with av_mallocz() before
+     * AVCodecContext.get_buffer() and deallocated after
+     * AVCodecContext.release_buffer().
+     */
+    int priv_data_size;
+} AVHWAccel;
+/**
+ * four components are given, that's all.
+ * the last component is alpha
+ */
+typedef struct AVPicture {
+    uint8_t *data[4];
+    int linesize[4];       ///< number of bytes per line
+} AVPicture;
+/**
+ * AVPaletteControl
+ * This structure defines a method for communicating palette changes
+ * between and demuxer and a decoder.
+ *
+ * @deprecated Use AVPacket to send palette changes instead.
+ * This is totally broken.
+ */
+#define AVPALETTE_SIZE 1024
+#define AVPALETTE_COUNT 256
+typedef struct AVPaletteControl {
+    /* Demuxer sets this to 1 to indicate the palette has changed;
+     * decoder resets to 0. */
+    int palette_changed;
+    /* 4-byte ARGB palette entries, stored in native byte order; note that
+     * the individual palette components should be on a 8-bit scale; if
+     * the palette data comes from an IBM VGA native format, the component
+     * data is probably 6 bits in size and needs to be scaled. */
+    unsigned int palette[AVPALETTE_COUNT];
+} AVPaletteControl attribute_deprecated;
+enum AVSubtitleType {
+    SUBTITLE_NONE,
+    SUBTITLE_BITMAP,                ///< A bitmap, pict will be set
+    /**
+     * Plain text, the text field must be set by the decoder and is
+     * authoritative. ass and pict fields may contain approximations.
+     */
+    SUBTITLE_TEXT,
+    /**
+     * Formatted text, the ass field must be set by the decoder and is
+     * authoritative. pict and text fields may contain approximations.
+     */
+    SUBTITLE_ASS,
+};
+typedef struct AVSubtitleRect {
+    int x;         ///< top left corner  of pict, undefined when pict is not set
+    int y;         ///< top left corner  of pict, undefined when pict is not set
+    int w;         ///< width            of pict, undefined when pict is not set
+    int h;         ///< height           of pict, undefined when pict is not set
+    int nb_colors; ///< number of colors in pict, undefined when pict is not set
+    /**
+     * data+linesize for the bitmap of this subtitle.
+     * can be set for text/ass as well once they where rendered
+     */
+    AVPicture pict;
+    enum AVSubtitleType type;
+    char *text;                     ///< 0 terminated plain UTF-8 text
+    /**
+     * 0 terminated ASS/SSA compatible event line.
+     * The pressentation of this is unaffected by the other values in this
+     * struct.
+     */
+    char *ass;
+} AVSubtitleRect;
+typedef struct AVSubtitle {
+    uint16_t format; /* 0 = graphics */
+    uint32_t start_display_time; /* relative to packet pts, in ms */
+    uint32_t end_display_time; /* relative to packet pts, in ms */
+    unsigned num_rects;
+    AVSubtitleRect **rects;
+} AVSubtitle;
+/* resample.c */
+struct ReSampleContext;
+struct AVResampleContext;
+typedef struct ReSampleContext ReSampleContext;
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/**
+ * @deprecated Use av_audio_resample_init() instead.
+ */
+attribute_deprecated ReSampleContext *audio_resample_init(int output_channels, int input_channels,
+                                                          int output_rate, int input_rate);
+#endif
+/**
+ *  Initializes audio resampling context
+ *
+ * @param output_channels  number of output channels
+ * @param input_channels   number of input channels
+ * @param output_rate      output sample rate
+ * @param input_rate       input sample rate
+ * @param sample_fmt_out   requested output sample format
+ * @param sample_fmt_in    input sample format
+ * @param filter_length    length of each FIR filter in the filterbank relative to the cutoff freq
+ * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
+ * @param linear           If 1 then the used FIR filter will be linearly interpolated
+                           between the 2 closest, if 0 the closest will be used
+ * @param cutoff           cutoff frequency, 1.0 corresponds to half the output sampling rate
+ * @return allocated ReSampleContext, NULL if error occured
+ */
+ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
+                                        int output_rate, int input_rate,
+                                        enum SampleFormat sample_fmt_out,
+                                        enum SampleFormat sample_fmt_in,
+                                        int filter_length, int log2_phase_count,
+                                        int linear, double cutoff);
+int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples);
+void audio_resample_close(ReSampleContext *s);
+/**
+ * Initializes an audio resampler.
+ * Note, if either rate is not an integer then simply scale both rates up so they are.
+ * @param filter_length length of each FIR filter in the filterbank relative to the cutoff freq
+ * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
+ * @param linear If 1 then the used FIR filter will be linearly interpolated
+                 between the 2 closest, if 0 the closest will be used
+ * @param cutoff cutoff frequency, 1.0 corresponds to half the output sampling rate
+ */
+struct AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_length, int log2_phase_count, int linear, double cutoff);
+/**
+ * resamples.
+ * @param src an array of unconsumed samples
+ * @param consumed the number of samples of src which have been consumed are returned here
+ * @param src_size the number of unconsumed samples available
+ * @param dst_size the amount of space in samples available in dst
+ * @param update_ctx If this is 0 then the context will not be modified, that way several channels can be resampled with the same context.
+ * @return the number of samples written in dst or -1 if an error occurred
+ */
+int av_resample(struct AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx);
+/**
+ * Compensates samplerate/timestamp drift. The compensation is done by changing
+ * the resampler parameters, so no audible clicks or similar distortions occur
+ * @param compensation_distance distance in output samples over which the compensation should be performed
+ * @param sample_delta number of output samples which should be output less
+ *
+ * example: av_resample_compensate(c, 10, 500)
+ * here instead of 510 samples only 500 samples would be output
+ *
+ * note, due to rounding the actual compensation might be slightly different,
+ * especially if the compensation_distance is large and the in_rate used during init is small
+ */
+void av_resample_compensate(struct AVResampleContext *c, int sample_delta, int compensation_distance);
+void av_resample_close(struct AVResampleContext *c);
+/**
+ * Allocate memory for a picture.  Call avpicture_free to free it.
+ *
+ * @param picture the picture to be filled in
+ * @param pix_fmt the format of the picture
+ * @param width the width of the picture
+ * @param height the height of the picture
+ * @return zero if successful, a negative value if not
+ */
+int avpicture_alloc(AVPicture *picture, int pix_fmt, int width, int height);
+/**
+ * Free a picture previously allocated by avpicture_alloc().
+ *
+ * @param picture the AVPicture to be freed
+ */
+void avpicture_free(AVPicture *picture);
+/**
+ * Fill in the AVPicture fields.
+ * The fields of the given AVPicture are filled in by using the 'ptr' address
+ * which points to the image data buffer. Depending on the specified picture
+ * format, one or multiple image data pointers and line sizes will be set.
+ * If a planar format is specified, several pointers will be set pointing to
+ * the different picture planes and the line sizes of the different planes
+ * will be stored in the lines_sizes array.
+ *
+ * @param picture AVPicture whose fields are to be filled in
+ * @param ptr Buffer which will contain or contains the actual image data
+ * @param pix_fmt The format in which the picture data is stored.
+ * @param width the width of the image in pixels
+ * @param height the height of the image in pixels
+ * @return size of the image data in bytes
+ */
+int avpicture_fill(AVPicture *picture, uint8_t *ptr,
+                   int pix_fmt, int width, int height);
+int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height,
+                     unsigned char *dest, int dest_size);
+/**
+ * Calculate the size in bytes that a picture of the given width and height
+ * would occupy if stored in the given picture format.
+ *
+ * @param pix_fmt the given picture format
+ * @param width the width of the image
+ * @param height the height of the image
+ * @return Image data size in bytes
+ */
+int avpicture_get_size(int pix_fmt, int width, int height);
+void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift);
+const char *avcodec_get_pix_fmt_name(int pix_fmt);
+void avcodec_set_dimensions(AVCodecContext *s, int width, int height);
+enum PixelFormat avcodec_get_pix_fmt(const char* name);
+unsigned int avcodec_pix_fmt_to_codec_tag(enum PixelFormat p);
+#define FF_LOSS_RESOLUTION  0x0001 /**< loss due to resolution change */
+#define FF_LOSS_DEPTH       0x0002 /**< loss due to color depth change */
+#define FF_LOSS_COLORSPACE  0x0004 /**< loss due to color space conversion */
+#define FF_LOSS_ALPHA       0x0008 /**< loss of alpha bits */
+#define FF_LOSS_COLORQUANT  0x0010 /**< loss due to color quantization */
+#define FF_LOSS_CHROMA      0x0020 /**< loss of chroma (e.g. RGB to gray conversion) */
+/**
+ * Computes what kind of losses will occur when converting from one specific
+ * pixel format to another.
+ * When converting from one pixel format to another, information loss may occur.
+ * For example, when converting from RGB24 to GRAY, the color information will
+ * be lost. Similarly, other losses occur when converting from some formats to
+ * other formats. These losses can involve loss of chroma, but also loss of
+ * resolution, loss of color depth, loss due to the color space conversion, loss
+ * of the alpha bits or loss due to color quantization.
+ * avcodec_get_fix_fmt_loss() informs you about the various types of losses
+ * which will occur when converting from one pixel format to another.
+ *
+ * @param[in] dst_pix_fmt destination pixel format
+ * @param[in] src_pix_fmt source pixel format
+ * @param[in] has_alpha Whether the source pixel format alpha channel is used.
+ * @return Combination of flags informing you what kind of losses will occur.
+ */
+int avcodec_get_pix_fmt_loss(int dst_pix_fmt, int src_pix_fmt,
+                             int has_alpha);
+/**
+ * Finds the best pixel format to convert to given a certain source pixel
+ * format.  When converting from one pixel format to another, information loss
+ * may occur.  For example, when converting from RGB24 to GRAY, the color
+ * information will be lost. Similarly, other losses occur when converting from
+ * some formats to other formats. avcodec_find_best_pix_fmt() searches which of
+ * the given pixel formats should be used to suffer the least amount of loss.
+ * The pixel formats from which it chooses one, are determined by the
+ * \p pix_fmt_mask parameter.
+ *
+ * @code
+ * src_pix_fmt = PIX_FMT_YUV420P;
+ * pix_fmt_mask = (1 << PIX_FMT_YUV422P) || (1 << PIX_FMT_RGB24);
+ * dst_pix_fmt = avcodec_find_best_pix_fmt(pix_fmt_mask, src_pix_fmt, alpha, &loss);
+ * @endcode
+ *
+ * @param[in] pix_fmt_mask bitmask determining which pixel format to choose from
+ * @param[in] src_pix_fmt source pixel format
+ * @param[in] has_alpha Whether the source pixel format alpha channel is used.
+ * @param[out] loss_ptr Combination of flags informing you what kind of losses will occur.
+ * @return The best pixel format to convert to or -1 if none was found.
+ */
+int avcodec_find_best_pix_fmt(int64_t pix_fmt_mask, int src_pix_fmt,
+                              int has_alpha, int *loss_ptr);
+/**
+ * Print in buf the string corresponding to the pixel format with
+ * number pix_fmt, or an header if pix_fmt is negative.
+ *
+ * @param[in] buf the buffer where to write the string
+ * @param[in] buf_size the size of buf
+ * @param[in] pix_fmt the number of the pixel format to print the corresponding info string, or
+ * a negative value to print the corresponding header.
+ * Meaningful values for obtaining a pixel format info vary from 0 to PIX_FMT_NB -1.
+ */
+void avcodec_pix_fmt_string (char *buf, int buf_size, int pix_fmt);
+#define FF_ALPHA_TRANSP       0x0001 /* image has some totally transparent pixels */
+#define FF_ALPHA_SEMI_TRANSP  0x0002 /* image has some transparent pixels */
+/**
+ * Tell if an image really has transparent alpha values.
+ * @return ored mask of FF_ALPHA_xxx constants
+ */
+int img_get_alpha_info(const AVPicture *src,
+                       int pix_fmt, int width, int height);
+/* deinterlace a picture */
+/* deinterlace - if not supported return -1 */
+int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
+                          int pix_fmt, int width, int height);
+/* external high level API */
+/**
+ * If c is NULL, returns the first registered codec,
+ * if c is non-NULL, returns the next registered codec after c,
+ * or NULL if c is the last one.
+ */
+AVCodec *av_codec_next(AVCodec *c);
+/**
+ * Returns the LIBAVCODEC_VERSION_INT constant.
+ */
+unsigned avcodec_version(void);
+/**
+ * Initializes libavcodec.
+ *
+ * @warning This function \e must be called before any other libavcodec
+ * function.
+ */
+void avcodec_init(void);
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/**
+ * @deprecated Deprecated in favor of avcodec_register().
+ */
+attribute_deprecated void register_avcodec(AVCodec *codec);
+#endif
+/**
+ * Register the codec \p codec and initialize libavcodec.
+ *
+ * @see avcodec_init()
+ */
+void avcodec_register(AVCodec *codec);
+/**
+ * Finds a registered encoder with a matching codec ID.
+ *
+ * @param id CodecID of the requested encoder
+ * @return An encoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_encoder(enum CodecID id);
+/**
+ * Finds a registered encoder with the specified name.
+ *
+ * @param name name of the requested encoder
+ * @return An encoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_encoder_by_name(const char *name);
+/**
+ * Finds a registered decoder with a matching codec ID.
+ *
+ * @param id CodecID of the requested decoder
+ * @return A decoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_decoder(enum CodecID id);
+/**
+ * Finds a registered decoder with the specified name.
+ *
+ * @param name name of the requested decoder
+ * @return A decoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_decoder_by_name(const char *name);
+void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode);
+/**
+ * Sets the fields of the given AVCodecContext to default values.
+ *
+ * @param s The AVCodecContext of which the fields should be set to default values.
+ */
+void avcodec_get_context_defaults(AVCodecContext *s);
+/** THIS FUNCTION IS NOT YET PART OF THE PUBLIC API!
+ *  we WILL change its arguments and name a few times! */
+void avcodec_get_context_defaults2(AVCodecContext *s, enum CodecType);
+/**
+ * Allocates an AVCodecContext and sets its fields to default values.  The
+ * resulting struct can be deallocated by simply calling av_free().
+ *
+ * @return An AVCodecContext filled with default values or NULL on failure.
+ * @see avcodec_get_context_defaults
+ */
+AVCodecContext *avcodec_alloc_context(void);
+/** THIS FUNCTION IS NOT YET PART OF THE PUBLIC API!
+ *  we WILL change its arguments and name a few times! */
+AVCodecContext *avcodec_alloc_context2(enum CodecType);
+/**
+ * Sets the fields of the given AVFrame to default values.
+ *
+ * @param pic The AVFrame of which the fields should be set to default values.
+ */
+void avcodec_get_frame_defaults(AVFrame *pic);
+/**
+ * Allocates an AVFrame and sets its fields to default values.  The resulting
+ * struct can be deallocated by simply calling av_free().
+ *
+ * @return An AVFrame filled with default values or NULL on failure.
+ * @see avcodec_get_frame_defaults
+ */
+AVFrame *avcodec_alloc_frame(void);
+int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic);
+void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic);
+int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic);
+void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height);
+/**
+ * Checks if the given dimension of a picture is valid, meaning that all
+ * bytes of the picture can be addressed with a signed int.
+ *
+ * @param[in] w Width of the picture.
+ * @param[in] h Height of the picture.
+ * @return Zero if valid, a negative value if invalid.
+ */
+int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h);
+enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum PixelFormat * fmt);
+int avcodec_thread_init(AVCodecContext *s, int thread_count);
+void avcodec_thread_free(AVCodecContext *s);
+int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size);
+int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size);
+//FIXME func typedef
+/**
+ * Initializes the AVCodecContext to use the given AVCodec. Prior to using this
+ * function the context has to be allocated.
+ *
+ * The functions avcodec_find_decoder_by_name(), avcodec_find_encoder_by_name(),
+ * avcodec_find_decoder() and avcodec_find_encoder() provide an easy way for
+ * retrieving a codec.
+ *
+ * @warning This function is not thread safe!
+ *
+ * @code
+ * avcodec_register_all();
+ * codec = avcodec_find_decoder(CODEC_ID_H264);
+ * if (!codec)
+ *     exit(1);
+ *
+ * context = avcodec_alloc_context();
+ *
+ * if (avcodec_open(context, codec) < 0)
+ *     exit(1);
+ * @endcode
+ *
+ * @param avctx The context which will be set up to use the given codec.
+ * @param codec The codec to use within the context.
+ * @return zero on success, a negative value on error
+ * @see avcodec_alloc_context, avcodec_find_decoder, avcodec_find_encoder
+ */
+int avcodec_open(AVCodecContext *avctx, AVCodec *codec);
+/**
+ * Decodes an audio frame from \p buf into \p samples.
+ * The avcodec_decode_audio2() function decodes an audio frame from the input
+ * buffer \p buf of size \p buf_size. To decode it, it makes use of the
+ * audio codec which was coupled with \p avctx using avcodec_open(). The
+ * resulting decoded frame is stored in output buffer \p samples.  If no frame
+ * could be decompressed, \p frame_size_ptr is zero. Otherwise, it is the
+ * decompressed frame size in \e bytes.
+ *
+ * @warning You \e must set \p frame_size_ptr to the allocated size of the
+ * output buffer before calling avcodec_decode_audio2().
+ *
+ * @warning The input buffer must be \c FF_INPUT_BUFFER_PADDING_SIZE larger than
+ * the actual read bytes because some optimized bitstream readers read 32 or 64
+ * bits at once and could read over the end.
+ *
+ * @warning The end of the input buffer \p buf should be set to 0 to ensure that
+ * no overreading happens for damaged MPEG streams.
+ *
+ * @note You might have to align the input buffer \p buf and output buffer \p
+ * samples. The alignment requirements depend on the CPU: On some CPUs it isn't
+ * necessary at all, on others it won't work at all if not aligned and on others
+ * it will work but it will have an impact on performance. In practice, the
+ * bitstream should have 4 byte alignment at minimum and all sample data should
+ * be 16 byte aligned unless the CPU doesn't need it (AltiVec and SSE do). If
+ * the linesize is not a multiple of 16 then there's no sense in aligning the
+ * start of the buffer to 16.
+ *
+ * @param avctx the codec context
+ * @param[out] samples the output buffer
+ * @param[in,out] frame_size_ptr the output buffer size in bytes
+ * @param[in] buf the input buffer
+ * @param[in] buf_size the input buffer size in bytes
+ * @return On error a negative value is returned, otherwise the number of bytes
+ * used or zero if no frame could be decompressed.
+ */
+int avcodec_decode_audio2(AVCodecContext *avctx, int16_t *samples,
+                         int *frame_size_ptr,
+                         const uint8_t *buf, int buf_size);
+/**
+ * Decodes a video frame from \p buf into \p picture.
+ * The avcodec_decode_video() function decodes a video frame from the input
+ * buffer \p buf of size \p buf_size. To decode it, it makes use of the
+ * video codec which was coupled with \p avctx using avcodec_open(). The
+ * resulting decoded frame is stored in \p picture.
+ *
+ * @warning The input buffer must be \c FF_INPUT_BUFFER_PADDING_SIZE larger than
+ * the actual read bytes because some optimized bitstream readers read 32 or 64
+ * bits at once and could read over the end.
+ *
+ * @warning The end of the input buffer \p buf should be set to 0 to ensure that
+ * no overreading happens for damaged MPEG streams.
+ *
+ * @note You might have to align the input buffer \p buf and output buffer \p
+ * samples. The alignment requirements depend on the CPU: on some CPUs it isn't
+ * necessary at all, on others it won't work at all if not aligned and on others
+ * it will work but it will have an impact on performance. In practice, the
+ * bitstream should have 4 byte alignment at minimum and all sample data should
+ * be 16 byte aligned unless the CPU doesn't need it (AltiVec and SSE do). If
+ * the linesize is not a multiple of 16 then there's no sense in aligning the
+ * start of the buffer to 16.
+ *
+ * @note Some codecs have a delay between input and output, these need to be
+ * feeded with buf=NULL, buf_size=0 at the end to return the remaining frames.
+ *
+ * @param avctx the codec context
+ * @param[out] picture The AVFrame in which the decoded video frame will be stored.
+ * @param[in] buf the input buffer
+ * @param[in] buf_size the size of the input buffer in bytes
+ * @param[in,out] got_picture_ptr Zero if no frame could be decompressed, otherwise, it is nonzero.
+ * @return On error a negative value is returned, otherwise the number of bytes
+ * used or zero if no frame could be decompressed.
+ */
+int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
+                         int *got_picture_ptr,
+                         const uint8_t *buf, int buf_size);
+/* Decode a subtitle message. Return -1 if error, otherwise return the
+ * number of bytes used. If no subtitle could be decompressed,
+ * got_sub_ptr is zero. Otherwise, the subtitle is stored in *sub. */
+int avcodec_decode_subtitle(AVCodecContext *avctx, AVSubtitle *sub,
+                            int *got_sub_ptr,
+                            const uint8_t *buf, int buf_size);
+int avcodec_parse_frame(AVCodecContext *avctx, uint8_t **pdata,
+                        int *data_size_ptr,
+                        uint8_t *buf, int buf_size);
+/**
+ * Encodes an audio frame from \p samples into \p buf.
+ * The avcodec_encode_audio() function encodes an audio frame from the input
+ * buffer \p samples. To encode it, it makes use of the audio codec which was
+ * coupled with \p avctx using avcodec_open(). The resulting encoded frame is
+ * stored in output buffer \p buf.
+ *
+ * @note The output buffer should be at least \c FF_MIN_BUFFER_SIZE bytes large.
+ *
+ * @param avctx the codec context
+ * @param[out] buf the output buffer
+ * @param[in] buf_size the output buffer size
+ * @param[in] samples the input buffer containing the samples
+ * The number of samples read from this buffer is frame_size*channels,
+ * both of which are defined in \p avctx.
+ * For PCM audio the number of samples read from \p samples is equal to
+ * \p buf_size * input_sample_size / output_sample_size.
+ * @return On error a negative value is returned, on success zero or the number
+ * of bytes used to encode the data read from the input buffer.
+ */
+int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                         const short *samples);
+/**
+ * Encodes a video frame from \p pict into \p buf.
+ * The avcodec_encode_video() function encodes a video frame from the input
+ * \p pict. To encode it, it makes use of the video codec which was coupled with
+ * \p avctx using avcodec_open(). The resulting encoded bytes representing the
+ * frame are stored in the output buffer \p buf. The input picture should be
+ * stored using a specific format, namely \c avctx.pix_fmt.
+ *
+ * @param avctx the codec context
+ * @param[out] buf the output buffer for the bitstream of encoded frame
+ * @param[in] buf_size the size of the output buffer in bytes
+ * @param[in] pict the input picture to encode
+ * @return On error a negative value is returned, on success zero or the number
+ * of bytes used from the output buffer.
+ */
+int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                         const AVFrame *pict);
+int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                            const AVSubtitle *sub);
+int avcodec_close(AVCodecContext *avctx);
+/**
+ * Register all the codecs, parsers and bitstream filters which were enabled at
+ * configuration time. If you do not call this function you can select exactly
+ * which formats you want to support, by using the individual registration
+ * functions.
+ *
+ * @see avcodec_register
+ * @see av_register_codec_parser
+ * @see av_register_bitstream_filter
+ */
+void avcodec_register_all(void);
+/**
+ * Flush buffers, should be called when seeking or when switching to a different stream.
+ */
+void avcodec_flush_buffers(AVCodecContext *avctx);
+void avcodec_default_free_buffers(AVCodecContext *s);
+/* misc useful functions */
+/**
+ * Returns a single letter to describe the given picture type \p pict_type.
+ *
+ * @param[in] pict_type the picture type
+ * @return A single character representing the picture type.
+ */
+char av_get_pict_type_char(int pict_type);
+/**
+ * Returns codec bits per sample.
+ *
+ * @param[in] codec_id the codec
+ * @return Number of bits per sample or zero if unknown for the given codec.
+ */
+int av_get_bits_per_sample(enum CodecID codec_id);
+/**
+ * Returns sample format bits per sample.
+ *
+ * @param[in] sample_fmt the sample format
+ * @return Number of bits per sample or zero if unknown for the given sample format.
+ */
+int av_get_bits_per_sample_format(enum SampleFormat sample_fmt);
+/* frame parsing */
+typedef struct AVCodecParserContext {
+    void *priv_data;
+    struct AVCodecParser *parser;
+    int64_t frame_offset; /* offset of the current frame */
+    int64_t cur_offset; /* current offset
+                           (incremented by each av_parser_parse()) */
+    int64_t next_frame_offset; /* offset of the next frame */
+    /* video info */
+    int pict_type; /* XXX: Put it back in AVCodecContext. */
+    /**
+     * This field is used for proper frame duration computation in lavf.
+     * It signals, how much longer the frame duration of the current frame
+     * is compared to normal frame duration.
+     *
+     * frame_duration = (1 + repeat_pict) * time_base
+     *
+     * It is used by codecs like H.264 to display telecined material.
+     */
+    int repeat_pict; /* XXX: Put it back in AVCodecContext. */
+    int64_t pts;     /* pts of the current frame */
+    int64_t dts;     /* dts of the current frame */
+    /* private data */
+    int64_t last_pts;
+    int64_t last_dts;
+    int fetch_timestamp;
+#define AV_PARSER_PTS_NB 4
+    int cur_frame_start_index;
+    int64_t cur_frame_offset[AV_PARSER_PTS_NB];
+    int64_t cur_frame_pts[AV_PARSER_PTS_NB];
+    int64_t cur_frame_dts[AV_PARSER_PTS_NB];
+    int flags;
+#define PARSER_FLAG_COMPLETE_FRAMES           0x0001
+    int64_t offset;      ///< byte offset from starting packet start
+    int64_t cur_frame_end[AV_PARSER_PTS_NB];
+    /*!
+     * Set by parser to 1 for key frames and 0 for non-key frames.
+     * It is initialized to -1, so if the parser doesn't set this flag,
+     * old-style fallback using FF_I_TYPE picture type as key frames
+     * will be used.
+     */
+    int key_frame;
+    /**
+     * Time difference in stream time base units from the pts of this
+     * packet to the point at which the output from the decoder has converged
+     * independent from the availability of previous frames. That is, the
+     * frames are virtually identical no matter if decoding started from
+     * the very first frame or from this keyframe.
+     * Is AV_NOPTS_VALUE if unknown.
+     * This field is not the display duration of the current frame.
+     *
+     * The purpose of this field is to allow seeking in streams that have no
+     * keyframes in the conventional sense. It corresponds to the
+     * recovery point SEI in H.264 and match_time_delta in NUT. It is also
+     * essential for some types of subtitle streams to ensure that all
+     * subtitles are correctly displayed after seeking.
+     */
+    int64_t convergence_duration;
+    // Timestamp generation support:
+    /**
+     * Synchronization point for start of timestamp generation.
+     *
+     * Set to >0 for sync point, 0 for no sync point and <0 for undefined
+     * (default).
+     *
+     * For example, this corresponds to presence of H.264 buffering period
+     * SEI message.
+     */
+    int dts_sync_point;
+    /**
+     * Offset of the current timestamp against last timestamp sync point in
+     * units of AVCodecContext.time_base.
+     *
+     * Set to INT_MIN when dts_sync_point unused. Otherwise, it must
+     * contain a valid timestamp offset.
+     *
+     * Note that the timestamp of sync point has usually a nonzero
+     * dts_ref_dts_delta, which refers to the previous sync point. Offset of
+     * the next frame after timestamp sync point will be usually 1.
+     *
+     * For example, this corresponds to H.264 cpb_removal_delay.
+     */
+    int dts_ref_dts_delta;
+    /**
+     * Presentation delay of current frame in units of AVCodecContext.time_base.
+     *
+     * Set to INT_MIN when dts_sync_point unused. Otherwise, it must
+     * contain valid non-negative timestamp delta (presentation time of a frame
+     * must not lie in the past).
+     *
+     * This delay represents the difference between decoding and presentation
+     * time of the frame.
+     *
+     * For example, this corresponds to H.264 dpb_output_delay.
+     */
+    int pts_dts_delta;
+    /**
+     * Position of the packet in file.
+     *
+     * Analogous to cur_frame_pts/dts
+     */
+    int64_t cur_frame_pos[AV_PARSER_PTS_NB];
+    /**
+     * Byte position of currently parsed frame in stream.
+     */
+    int64_t pos;
+    /**
+     * Previous frame byte position.
+     */
+    int64_t last_pos;
+} AVCodecParserContext;
+typedef struct AVCodecParser {
+    int codec_ids[5]; /* several codec IDs are permitted */
+    int priv_data_size;
+    int (*parser_init)(AVCodecParserContext *s);
+    int (*parser_parse)(AVCodecParserContext *s,
+                        AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size);
+    void (*parser_close)(AVCodecParserContext *s);
+    int (*split)(AVCodecContext *avctx, const uint8_t *buf, int buf_size);
+    struct AVCodecParser *next;
+} AVCodecParser;
+AVCodecParser *av_parser_next(AVCodecParser *c);
+void av_register_codec_parser(AVCodecParser *parser);
+AVCodecParserContext *av_parser_init(int codec_id);
+attribute_deprecated
+int av_parser_parse(AVCodecParserContext *s,
+                    AVCodecContext *avctx,
+                    uint8_t **poutbuf, int *poutbuf_size,
+                    const uint8_t *buf, int buf_size,
+                    int64_t pts, int64_t dts);
+/**
+ * Parse a packet.
+ *
+ * @param s             parser context.
+ * @param avctx         codec context.
+ * @param poutbuf       set to pointer to parsed buffer or NULL if not yet finished.
+ * @param poutbuf_size  set to size of parsed buffer or zero if not yet finished.
+ * @param buf           input buffer.
+ * @param buf_size      input length, to signal EOF, this should be 0 (so that the last frame can be output).
+ * @param pts           input presentation timestamp.
+ * @param dts           input decoding timestamp.
+ * @param pos           input byte position in stream.
+ * @return the number of bytes of the input bitstream used.
+ *
+ * Example:
+ * @code
+ *   while(in_len){
+ *       len = av_parser_parse2(myparser, AVCodecContext, &data, &size,
+ *                                        in_data, in_len,
+ *                                        pts, dts, pos);
+ *       in_data += len;
+ *       in_len  -= len;
+ *
+ *       if(size)
+ *          decode_frame(data, size);
+ *   }
+ * @endcode
+ */
+int av_parser_parse2(AVCodecParserContext *s,
+                     AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size,
+                     int64_t pts, int64_t dts,
+                     int64_t pos);
+int av_parser_change(AVCodecParserContext *s,
+                     AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe);
+void av_parser_close(AVCodecParserContext *s);
+typedef struct AVBitStreamFilterContext {
+    void *priv_data;
+    struct AVBitStreamFilter *filter;
+    AVCodecParserContext *parser;
+    struct AVBitStreamFilterContext *next;
+} AVBitStreamFilterContext;
+typedef struct AVBitStreamFilter {
+    const char *name;
+    int priv_data_size;
+    int (*filter)(AVBitStreamFilterContext *bsfc,
+                  AVCodecContext *avctx, const char *args,
+                  uint8_t **poutbuf, int *poutbuf_size,
+                  const uint8_t *buf, int buf_size, int keyframe);
+    void (*close)(AVBitStreamFilterContext *bsfc);
+    struct AVBitStreamFilter *next;
+} AVBitStreamFilter;
+void av_register_bitstream_filter(AVBitStreamFilter *bsf);
+AVBitStreamFilterContext *av_bitstream_filter_init(const char *name);
+int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc,
+                               AVCodecContext *avctx, const char *args,
+                               uint8_t **poutbuf, int *poutbuf_size,
+                               const uint8_t *buf, int buf_size, int keyframe);
+void av_bitstream_filter_close(AVBitStreamFilterContext *bsf);
+AVBitStreamFilter *av_bitstream_filter_next(AVBitStreamFilter *f);
+/* memory */
+/**
+ * Reallocates the given block if it is not large enough, otherwise it
+ * does nothing.
+ *
+ * @see av_realloc
+ */
+void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size);
+/**
+ * Copy image 'src' to 'dst'.
+ */
+void av_picture_copy(AVPicture *dst, const AVPicture *src,
+              int pix_fmt, int width, int height);
+/**
+ * Crop image top and left side.
+ */
+int av_picture_crop(AVPicture *dst, const AVPicture *src,
+             int pix_fmt, int top_band, int left_band);
+/**
+ * Pad image.
+ */
+int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt,
+            int padtop, int padbottom, int padleft, int padright, int *color);
+unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+/**
+ * Parses \p str and put in \p width_ptr and \p height_ptr the detected values.
+ *
+ * @return 0 in case of a successful parsing, a negative value otherwise
+ * @param[in] str the string to parse: it has to be a string in the format
+ * <width>x<height> or a valid video frame size abbreviation.
+ * @param[in,out] width_ptr pointer to the variable which will contain the detected
+ * frame width value
+ * @param[in,out] height_ptr pointer to the variable which will contain the detected
+ * frame height value
+ */
+int av_parse_video_frame_size(int *width_ptr, int *height_ptr, const char *str);
+/**
+ * Parses \p str and put in \p frame_rate the detected values.
+ *
+ * @return 0 in case of a successful parsing, a negative value otherwise
+ * @param[in] str the string to parse: it has to be a string in the format
+ * <frame_rate_num>/<frame_rate_den>, a float number or a valid video rate abbreviation
+ * @param[in,out] frame_rate pointer to the AVRational which will contain the detected
+ * frame rate
+ */
+int av_parse_video_frame_rate(AVRational *frame_rate, const char *str);
+/* error handling */
+#if EINVAL > 0
+#define AVERROR(e) (-(e)) /**< Returns a negative error code from a POSIX error code, to return from library functions. */
+#define AVUNERROR(e) (-(e)) /**< Returns a POSIX error code from a library function error return value. */
+#else
+/* Some platforms have E* and errno already negated. */
+#define AVERROR(e) (e)
+#define AVUNERROR(e) (e)
+#endif
+#define AVERROR_UNKNOWN     AVERROR(EINVAL)  /**< unknown error */
+#define AVERROR_IO          AVERROR(EIO)     /**< I/O error */
+#define AVERROR_NUMEXPECTED AVERROR(EDOM)    /**< Number syntax expected in filename. */
+#define AVERROR_INVALIDDATA AVERROR(EINVAL)  /**< invalid data found */
+#define AVERROR_NOMEM       AVERROR(ENOMEM)  /**< not enough memory */
+#define AVERROR_NOFMT       AVERROR(EILSEQ)  /**< unknown format */
+#define AVERROR_NOTSUPP     AVERROR(ENOSYS)  /**< Operation not supported. */
+#define AVERROR_NOENT       AVERROR(ENOENT)  /**< No such file or directory. */
+#define AVERROR_EOF         AVERROR(EPIPE)   /**< End of file. */
+#define AVERROR_PATCHWELCOME    -MKTAG('P','A','W','E') /**< Not yet implemented in FFmpeg. Patches welcome. */
+/**
+ * Registers the hardware accelerator \p hwaccel.
+ */
+void av_register_hwaccel(AVHWAccel *hwaccel);
+/**
+ * If hwaccel is NULL, returns the first registered hardware accelerator,
+ * if hwaccel is non-NULL, returns the next registered hardware accelerator
+ * after hwaccel, or NULL if hwaccel is the last one.
+ */
+AVHWAccel *av_hwaccel_next(AVHWAccel *hwaccel);
+#endif /* MT : DELETE THIS LINE ONLY. */
+#endif /* AVCODEC_AVCODEC_H */
diff --git a/apps/codecs/libwmapro/avfft.h b/apps/codecs/libwmapro/avfft.h
new file mode 100644
index 0000000000..623f0a33b5
--- /dev/null
+++ b/apps/codecs/libwmapro/avfft.h
@@ -0,0 +1,99 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_AVFFT_H
+#define AVCODEC_AVFFT_H
+typedef float FFTSample;
+typedef struct FFTComplex {
+    FFTSample re, im;
+} FFTComplex;
+typedef struct FFTContext FFTContext;
+/**
+ * Set up a complex FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
+ */
+FFTContext *av_fft_init(int nbits, int inverse);
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+void av_fft_permute(FFTContext *s, FFTComplex *z);
+/**
+ * Do a complex FFT with the parameters defined in av_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+void av_fft_calc(FFTContext *s, FFTComplex *z);
+void av_fft_end(FFTContext *s);
+FFTContext *av_mdct_init(int nbits, int inverse, double scale);
+void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_end(FFTContext *s);
+/* Real Discrete Fourier Transform */
+enum RDFTransformType {
+    DFT_R2C,
+    IDFT_C2R,
+    IDFT_R2C,
+    DFT_C2R,
+};
+typedef struct RDFTContext RDFTContext;
+/**
+ * Set up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans);
+void av_rdft_calc(RDFTContext *s, FFTSample *data);
+void av_rdft_end(RDFTContext *s);
+/* Discrete Cosine Transform */
+typedef struct DCTContext DCTContext;
+enum DCTTransformType {
+    DCT_II = 0,
+    DCT_III,
+    DCT_I,
+    DST_I,
+};
+/**
+ * Sets up DCT.
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+DCTContext *av_dct_init(int nbits, enum DCTTransformType type);
+void av_dct_calc(DCTContext *s, FFTSample *data);
+void av_dct_end (DCTContext *s);
+#endif /* AVCODEC_AVFFT_H */
diff --git a/apps/codecs/libwmapro/bitstream.c b/apps/codecs/libwmapro/bitstream.c
new file mode 100644
index 0000000000..a149ff0289
--- /dev/null
+++ b/apps/codecs/libwmapro/bitstream.c
@@ -0,0 +1,338 @@
+/*
+ * Common bit i/o utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2010 Loren Merritt
+ *
+ * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/bitstream.c
+ * bitstream api.
+ */
+#include "avcodec.h"
+#include "get_bits.h"
+#include "put_bits.h"
+#define CONFIG_SMALL 0 // added to make it compile
+const uint8_t ff_log2_run[32]={
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7,
+ 8, 9,10,11,12,13,14,15
+};
+void align_put_bits(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    put_bits(s,(  - s->index) & 7,0);
+#else
+    put_bits(s,s->bit_left & 7,0);
+#endif
+}
+void ff_put_string(PutBitContext *pb, const char *string, int terminate_string)
+{
+    while(*string){
+        put_bits(pb, 8, *string);
+        string++;
+    }
+    if(terminate_string)
+        put_bits(pb, 8, 0);
+}
+void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
+{
+    int words= length>>4;
+    int bits= length&15;
+    int i;
+    if(length==0) return;
+    if(CONFIG_SMALL || words < 16 || put_bits_count(pb)&7){
+        for(i=0; i<words; i++) put_bits(pb, 16, AV_RB16(src + 2*i));
+    }else{
+        for(i=0; put_bits_count(pb)&31; i++)
+            put_bits(pb, 8, src[i]);
+        flush_put_bits(pb);
+        memcpy(put_bits_ptr(pb), src+i, 2*words-i);
+        skip_put_bytes(pb, 2*words-i);
+    }
+    put_bits(pb, bits, AV_RB16(src + 2*words)>>(16-bits));
+}
+/* VLC decoding */
+//#define DEBUG_VLC
+#define GET_DATA(v, table, i, wrap, size) \
+{\
+    const uint8_t *ptr = (const uint8_t *)table + i * wrap;\
+    switch(size) {\
+    case 1:\
+        v = *(const uint8_t *)ptr;\
+        break;\
+    case 2:\
+        v = *(const uint16_t *)ptr;\
+        break;\
+    default:\
+        v = *(const uint32_t *)ptr;\
+        break;\
+    }\
+}
+static int alloc_table(VLC *vlc, int size, int use_static)
+{
+    int index;
+    index = vlc->table_size;
+    vlc->table_size += size;
+    if (vlc->table_size > vlc->table_allocated) {
+        if(use_static)
+            abort(); //cant do anything, init_vlc() is used with too little memory
+        vlc->table_allocated += (1 << vlc->bits);
+        vlc->table = av_realloc(vlc->table,
+                                sizeof(VLC_TYPE) * 2 * vlc->table_allocated);
+        if (!vlc->table)
+            return -1;
+    }
+    return index;
+}
+static av_always_inline uint32_t bitswap_32(uint32_t x) {
+    return av_reverse[x&0xFF]<<24
+         | av_reverse[(x>>8)&0xFF]<<16
+         | av_reverse[(x>>16)&0xFF]<<8
+         | av_reverse[x>>24];
+}
+typedef struct {
+    uint8_t bits;
+    uint16_t symbol;
+    /** codeword, with the first bit-to-be-read in the msb
+     * (even if intended for a little-endian bitstream reader) */
+    uint32_t code;
+} VLCcode;
+static int compare_vlcspec(const void *a, const void *b)
+{
+    const VLCcode *sa=a, *sb=b;
+    return (sa->code >> 1) - (sb->code >> 1);
+}
+/**
+ * Build VLC decoding tables suitable for use with get_vlc().
+ *
+ * @param vlc            the context to be initted
+ *
+ * @param table_nb_bits  max length of vlc codes to store directly in this table
+ *                       (Longer codes are delegated to subtables.)
+ *
+ * @param nb_codes       number of elements in codes[]
+ *
+ * @param codes          descriptions of the vlc codes
+ *                       These must be ordered such that codes going into the same subtable are contiguous.
+ *                       Sorting by VLCcode.code is sufficient, though not necessary.
+ */
+static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
+                       VLCcode *codes, int flags)
+{
+    int table_size, table_index, index, code_prefix, symbol, subtable_bits;
+    int i, j, k, n, nb, inc;
+    uint32_t code;
+    VLC_TYPE (*table)[2];
+    table_size = 1 << table_nb_bits;
+    table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
+#ifdef DEBUG_VLC
+    av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d\n",
+           table_index, table_size);
+#endif
+    if (table_index < 0)
+        return -1;
+    table = &vlc->table[table_index];
+    for (i = 0; i < table_size; i++) {
+        table[i][1] = 0; //bits
+        table[i][0] = -1; //codes
+    }
+    /* first pass: map codes and compute auxillary table sizes */
+    for (i = 0; i < nb_codes; i++) {
+        n = codes[i].bits;
+        code = codes[i].code;
+        symbol = codes[i].symbol;
+#if defined(DEBUG_VLC) && 0
+        av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code);
+#endif
+        if (n <= table_nb_bits) {
+            /* no need to add another table */
+            j = code >> (32 - table_nb_bits);
+            nb = 1 << (table_nb_bits - n);
+            inc = 1;
+            if (flags & INIT_VLC_LE) {
+                j = bitswap_32(code);
+                inc = 1 << n;
+            }
+            for (k = 0; k < nb; k++) {
+#ifdef DEBUG_VLC
+                av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n",
+                       j, i, n);
+#endif
+                if (table[j][1] /*bits*/ != 0) {
+                    av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
+                    return -1;
+                }
+                table[j][1] = n; //bits
+                table[j][0] = symbol;
+                j += inc;
+            }
+        } else {
+            /* fill auxiliary table recursively */
+            n -= table_nb_bits;
+            code_prefix = code >> (32 - table_nb_bits);
+            subtable_bits = n;
+            codes[i].bits = n;
+            codes[i].code = code << table_nb_bits;
+            for (k = i+1; k < nb_codes; k++) {
+                n = codes[k].bits - table_nb_bits;
+                if (n <= 0)
+                    break;
+                code = codes[k].code;
+                if (code >> (32 - table_nb_bits) != code_prefix)
+                    break;
+                codes[k].bits = n;
+                codes[k].code = code << table_nb_bits;
+                subtable_bits = FFMAX(subtable_bits, n);
+            }
+            subtable_bits = FFMIN(subtable_bits, table_nb_bits);
+            j = (flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) : code_prefix;
+            table[j][1] = -subtable_bits;
+#ifdef DEBUG_VLC
+            av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n",
+                   j, codes[i].bits + table_nb_bits);
+#endif
+            index = build_table(vlc, subtable_bits, k-i, codes+i, flags);
+            if (index < 0)
+                return -1;
+            /* note: realloc has been done, so reload tables */
+            table = &vlc->table[table_index];
+            table[j][0] = index; //code
+            i = k-1;
+        }
+    }
+    return table_index;
+}
+/* Build VLC decoding tables suitable for use with get_vlc().
+   'nb_bits' set thee decoding table size (2^nb_bits) entries. The
+   bigger it is, the faster is the decoding. But it should not be too
+   big to save memory and L1 cache. '9' is a good compromise.
+   'nb_codes' : number of vlcs codes
+   'bits' : table which gives the size (in bits) of each vlc code.
+   'codes' : table which gives the bit pattern of of each vlc code.
+   'symbols' : table which gives the values to be returned from get_vlc().
+   'xxx_wrap' : give the number of bytes between each entry of the
+   'bits' or 'codes' tables.
+   'xxx_size' : gives the number of bytes of each entry of the 'bits'
+   or 'codes' tables.
+   'wrap' and 'size' allows to use any memory configuration and types
+   (byte/word/long) to store the 'bits', 'codes', and 'symbols' tables.
+   'use_static' should be set to 1 for tables, which should be freed
+   with av_free_static(), 0 if free_vlc() will be used.
+*/
+int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
+             const void *bits, int bits_wrap, int bits_size,
+             const void *codes, int codes_wrap, int codes_size,
+             const void *symbols, int symbols_wrap, int symbols_size,
+             int flags)
+{
+    VLCcode buf[nb_codes];
+    int i, j;
+    vlc->bits = nb_bits;
+    if(flags & INIT_VLC_USE_NEW_STATIC){
+        if(vlc->table_size && vlc->table_size == vlc->table_allocated){
+            return 0;
+        }else if(vlc->table_size){
+            abort(); // fatal error, we are called on a partially initialized table
+        }
+    }else {
+        vlc->table = NULL;
+        vlc->table_allocated = 0;
+        vlc->table_size = 0;
+    }
+#ifdef DEBUG_VLC
+    av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes);
+#endif
+    assert(symbols_size <= 2 || !symbols);
+    j = 0;
+#define COPY(condition)\
+    for (i = 0; i < nb_codes; i++) {\
+        GET_DATA(buf[j].bits, bits, i, bits_wrap, bits_size);\
+        if (!(condition))\
+            continue;\
+        GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);\
+        if (flags & INIT_VLC_LE)\
+            buf[j].code = bitswap_32(buf[j].code);\
+        else\
+            buf[j].code <<= 32 - buf[j].bits;\
+        if (symbols)\
+            GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size)\
+        else\
+            buf[j].symbol = i;\
+        j++;\
+    }
+    COPY(buf[j].bits > nb_bits);
+    // qsort is the slowest part of init_vlc, and could probably be improved or avoided
+    qsort(buf, j, sizeof(VLCcode), compare_vlcspec);
+    COPY(buf[j].bits && buf[j].bits <= nb_bits);
+    nb_codes = j;
+    if (build_table(vlc, nb_bits, nb_codes, buf, flags) < 0) {
+        av_freep(&vlc->table);
+        return -1;
+    }
+    if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size != vlc->table_allocated)
+        av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated);
+    return 0;
+}
+void free_vlc(VLC *vlc)
+{
+    av_freep(&vlc->table);
+}
diff --git a/apps/codecs/libwmapro/dsputil.c b/apps/codecs/libwmapro/dsputil.c
new file mode 100644
index 0000000000..b09311925a
--- /dev/null
+++ b/apps/codecs/libwmapro/dsputil.c
@@ -0,0 +1,4572 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/dsputil.c
+ * DSP utils
+ */
+#include "avcodec.h"
+#include "dsputil.h"
+//#include "simple_idct.h"
+//#include "faandct.h"
+//#include "faanidct.h"
+#include "mathops.h"
+//#include "mpegvideo.h"
+//#include "config.h"
+//#include "lpc.h"
+//#include "ac3dec.h"
+//#include "vorbis.h"
+//#include "png.h"
+#if 0
+uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
+uint32_t ff_squareTbl[512] = {0, };
+// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
+#define pb_7f (~0UL/255 * 0x7f)
+#define pb_80 (~0UL/255 * 0x80)
+const uint8_t ff_zigzag_direct[64] = {
+    0,   1,  8, 16,  9,  2,  3, 10,
+    17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+/* Specific zigzag scan for 248 idct. NOTE that unlike the
+   specification, we interleave the fields */
+const uint8_t ff_zigzag248_direct[64] = {
+     0,  8,  1,  9, 16, 24,  2, 10,
+    17, 25, 32, 40, 48, 56, 33, 41,
+    18, 26,  3, 11,  4, 12, 19, 27,
+    34, 42, 49, 57, 50, 58, 35, 43,
+    20, 28,  5, 13,  6, 14, 21, 29,
+    36, 44, 51, 59, 52, 60, 37, 45,
+    22, 30,  7, 15, 23, 31, 38, 46,
+    53, 61, 54, 62, 39, 47, 55, 63,
+};
+/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
+DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
+const uint8_t ff_alternate_horizontal_scan[64] = {
+    0,  1,   2,  3,  8,  9, 16, 17,
+    10, 11,  4,  5,  6,  7, 15, 14,
+    13, 12, 19, 18, 24, 25, 32, 33,
+    26, 27, 20, 21, 22, 23, 28, 29,
+    30, 31, 34, 35, 40, 41, 48, 49,
+    42, 43, 36, 37, 38, 39, 44, 45,
+    46, 47, 50, 51, 56, 57, 58, 59,
+    52, 53, 54, 55, 60, 61, 62, 63,
+};
+const uint8_t ff_alternate_vertical_scan[64] = {
+    0,  8,  16, 24,  1,  9,  2, 10,
+    17, 25, 32, 40, 48, 56, 57, 49,
+    41, 33, 26, 18,  3, 11,  4, 12,
+    19, 27, 34, 42, 50, 58, 35, 43,
+    51, 59, 20, 28,  5, 13,  6, 14,
+    21, 29, 36, 44, 52, 60, 37, 45,
+    53, 61, 22, 30,  7, 15, 23, 31,
+    38, 46, 54, 62, 39, 47, 55, 63,
+};
+/* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256
+ * for a>16909558, is an overestimate by less than 1 part in 1<<24 */
+const uint32_t ff_inverse[257]={
+         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757,
+ 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154,
+ 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709,
+ 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333,
+ 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367,
+ 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283,
+  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315,
+  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085,
+  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498,
+  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675,
+  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441,
+  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183,
+  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712,
+  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400,
+  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163,
+  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641,
+  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573,
+  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737,
+  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493,
+  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373,
+  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368,
+  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671,
+  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767,
+  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740,
+  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751,
+  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635,
+  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593,
+  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944,
+  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933,
+  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575,
+  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532,
+  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
+  16777216
+};
+/* Input permutation for the simple_idct_mmx */
+static const uint8_t simple_mmx_permutation[64]={
+        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
+        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
+        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
+        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
+        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
+        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
+        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
+        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
+};
+static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
+void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
+    int i;
+    int end;
+    st->scantable= src_scantable;
+    for(i=0; i<64; i++){
+        int j;
+        j = src_scantable[i];
+        st->permutated[i] = permutation[j];
+#if ARCH_PPC
+        st->inverse[j] = i;
+#endif
+    }
+    end=-1;
+    for(i=0; i<64; i++){
+        int j;
+        j = st->permutated[i];
+        if(j>end) end=j;
+        st->raster_end[i]= end;
+    }
+}
+static int pix_sum_c(uint8_t * pix, int line_size)
+{
+    int s, i, j;
+    s = 0;
+    for (i = 0; i < 16; i++) {
+        for (j = 0; j < 16; j += 8) {
+            s += pix[0];
+            s += pix[1];
+            s += pix[2];
+            s += pix[3];
+            s += pix[4];
+            s += pix[5];
+            s += pix[6];
+            s += pix[7];
+            pix += 8;
+        }
+        pix += line_size - 16;
+    }
+    return s;
+}
+static int pix_norm1_c(uint8_t * pix, int line_size)
+{
+    int s, i, j;
+    uint32_t *sq = ff_squareTbl + 256;
+    s = 0;
+    for (i = 0; i < 16; i++) {
+        for (j = 0; j < 16; j += 8) {
+#if 0
+            s += sq[pix[0]];
+            s += sq[pix[1]];
+            s += sq[pix[2]];
+            s += sq[pix[3]];
+            s += sq[pix[4]];
+            s += sq[pix[5]];
+            s += sq[pix[6]];
+            s += sq[pix[7]];
+#else
+#if LONG_MAX > 2147483647
+            register uint64_t x=*(uint64_t*)pix;
+            s += sq[x&0xff];
+            s += sq[(x>>8)&0xff];
+            s += sq[(x>>16)&0xff];
+            s += sq[(x>>24)&0xff];
+            s += sq[(x>>32)&0xff];
+            s += sq[(x>>40)&0xff];
+            s += sq[(x>>48)&0xff];
+            s += sq[(x>>56)&0xff];
+#else
+            register uint32_t x=*(uint32_t*)pix;
+            s += sq[x&0xff];
+            s += sq[(x>>8)&0xff];
+            s += sq[(x>>16)&0xff];
+            s += sq[(x>>24)&0xff];
+            x=*(uint32_t*)(pix+4);
+            s += sq[x&0xff];
+            s += sq[(x>>8)&0xff];
+            s += sq[(x>>16)&0xff];
+            s += sq[(x>>24)&0xff];
+#endif
+#endif
+            pix += 8;
+        }
+        pix += line_size - 16;
+    }
+    return s;
+}
+static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
+    int i;
+    for(i=0; i+8<=w; i+=8){
+        dst[i+0]= bswap_32(src[i+0]);
+        dst[i+1]= bswap_32(src[i+1]);
+        dst[i+2]= bswap_32(src[i+2]);
+        dst[i+3]= bswap_32(src[i+3]);
+        dst[i+4]= bswap_32(src[i+4]);
+        dst[i+5]= bswap_32(src[i+5]);
+        dst[i+6]= bswap_32(src[i+6]);
+        dst[i+7]= bswap_32(src[i+7]);
+    }
+    for(;i<w; i++){
+        dst[i+0]= bswap_32(src[i+0]);
+    }
+}
+static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
+{
+    int s, i;
+    uint32_t *sq = ff_squareTbl + 256;
+    s = 0;
+    for (i = 0; i < h; i++) {
+        s += sq[pix1[0] - pix2[0]];
+        s += sq[pix1[1] - pix2[1]];
+        s += sq[pix1[2] - pix2[2]];
+        s += sq[pix1[3] - pix2[3]];
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
+{
+    int s, i;
+    uint32_t *sq = ff_squareTbl + 256;
+    s = 0;
+    for (i = 0; i < h; i++) {
+        s += sq[pix1[0] - pix2[0]];
+        s += sq[pix1[1] - pix2[1]];
+        s += sq[pix1[2] - pix2[2]];
+        s += sq[pix1[3] - pix2[3]];
+        s += sq[pix1[4] - pix2[4]];
+        s += sq[pix1[5] - pix2[5]];
+        s += sq[pix1[6] - pix2[6]];
+        s += sq[pix1[7] - pix2[7]];
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint32_t *sq = ff_squareTbl + 256;
+    s = 0;
+    for (i = 0; i < h; i++) {
+        s += sq[pix1[ 0] - pix2[ 0]];
+        s += sq[pix1[ 1] - pix2[ 1]];
+        s += sq[pix1[ 2] - pix2[ 2]];
+        s += sq[pix1[ 3] - pix2[ 3]];
+        s += sq[pix1[ 4] - pix2[ 4]];
+        s += sq[pix1[ 5] - pix2[ 5]];
+        s += sq[pix1[ 6] - pix2[ 6]];
+        s += sq[pix1[ 7] - pix2[ 7]];
+        s += sq[pix1[ 8] - pix2[ 8]];
+        s += sq[pix1[ 9] - pix2[ 9]];
+        s += sq[pix1[10] - pix2[10]];
+        s += sq[pix1[11] - pix2[11]];
+        s += sq[pix1[12] - pix2[12]];
+        s += sq[pix1[13] - pix2[13]];
+        s += sq[pix1[14] - pix2[14]];
+        s += sq[pix1[15] - pix2[15]];
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+/* draw the edges of width 'w' of an image of size width, height */
+//FIXME check that this is ok for mpeg4 interlaced
+static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
+{
+    uint8_t *ptr, *last_line;
+    int i;
+    last_line = buf + (height - 1) * wrap;
+    for(i=0;i<w;i++) {
+        /* top and bottom */
+        memcpy(buf - (i + 1) * wrap, buf, width);
+        memcpy(last_line + (i + 1) * wrap, last_line, width);
+    }
+    /* left and right */
+    ptr = buf;
+    for(i=0;i<height;i++) {
+        memset(ptr - w, ptr[0], w);
+        memset(ptr + width, ptr[width-1], w);
+        ptr += wrap;
+    }
+    /* corners */
+    for(i=0;i<w;i++) {
+        memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
+        memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
+        memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
+        memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
+    }
+}
+/**
+ * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
+ * @param buf destination buffer
+ * @param src source buffer
+ * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param src_x x coordinate of the top left sample of the block in the source buffer
+ * @param src_y y coordinate of the top left sample of the block in the source buffer
+ * @param w width of the source buffer
+ * @param h height of the source buffer
+ */
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
+                                    int src_x, int src_y, int w, int h){
+    int x, y;
+    int start_y, start_x, end_y, end_x;
+    if(src_y>= h){
+        src+= (h-1-src_y)*linesize;
+        src_y=h-1;
+    }else if(src_y<=-block_h){
+        src+= (1-block_h-src_y)*linesize;
+        src_y=1-block_h;
+    }
+    if(src_x>= w){
+        src+= (w-1-src_x);
+        src_x=w-1;
+    }else if(src_x<=-block_w){
+        src+= (1-block_w-src_x);
+        src_x=1-block_w;
+    }
+    start_y= FFMAX(0, -src_y);
+    start_x= FFMAX(0, -src_x);
+    end_y= FFMIN(block_h, h-src_y);
+    end_x= FFMIN(block_w, w-src_x);
+    // copy existing part
+    for(y=start_y; y<end_y; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= src[x + y*linesize];
+        }
+    }
+    //top
+    for(y=0; y<start_y; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= buf[x + start_y*linesize];
+        }
+    }
+    //bottom
+    for(y=end_y; y<block_h; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
+        }
+    }
+    for(y=0; y<block_h; y++){
+       //left
+        for(x=0; x<start_x; x++){
+            buf[x + y*linesize]= buf[start_x + y*linesize];
+        }
+       //right
+        for(x=end_x; x<block_w; x++){
+            buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
+        }
+    }
+}
+static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+{
+    int i;
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        block[0] = pixels[0];
+        block[1] = pixels[1];
+        block[2] = pixels[2];
+        block[3] = pixels[3];
+        block[4] = pixels[4];
+        block[5] = pixels[5];
+        block[6] = pixels[6];
+        block[7] = pixels[7];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
+                          const uint8_t *s2, int stride){
+    int i;
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        block[0] = s1[0] - s2[0];
+        block[1] = s1[1] - s2[1];
+        block[2] = s1[2] - s2[2];
+        block[3] = s1[3] - s2[3];
+        block[4] = s1[4] - s2[4];
+        block[5] = s1[5] - s2[5];
+        block[6] = s1[6] - s2[6];
+        block[7] = s1[7] - s2[7];
+        s1 += stride;
+        s2 += stride;
+        block += 8;
+    }
+}
+static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        pixels[0] = cm[block[0]];
+        pixels[1] = cm[block[1]];
+        pixels[2] = cm[block[2]];
+        pixels[3] = cm[block[3]];
+        pixels[4] = cm[block[4]];
+        pixels[5] = cm[block[5]];
+        pixels[6] = cm[block[6]];
+        pixels[7] = cm[block[7]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = cm[block[0]];
+        pixels[1] = cm[block[1]];
+        pixels[2] = cm[block[2]];
+        pixels[3] = cm[block[3]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = cm[block[0]];
+        pixels[1] = cm[block[1]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void put_signed_pixels_clamped_c(const DCTELEM *block,
+                                        uint8_t *restrict pixels,
+                                        int line_size)
+{
+    int i, j;
+    for (i = 0; i < 8; i++) {
+        for (j = 0; j < 8; j++) {
+            if (*block < -128)
+                *pixels = 0;
+            else if (*block > 127)
+                *pixels = 255;
+            else
+                *pixels = (uint8_t)(*block + 128);
+            block++;
+            pixels++;
+        }
+        pixels += (line_size - 8);
+    }
+}
+static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                    int line_size)
+{
+    int i;
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        pixels[0] = block[0];
+        pixels[1] = block[1];
+        pixels[2] = block[2];
+        pixels[3] = block[3];
+        pixels[4] = block[4];
+        pixels[5] = block[5];
+        pixels[6] = block[6];
+        pixels[7] = block[7];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        pixels[0] = cm[pixels[0] + block[0]];
+        pixels[1] = cm[pixels[1] + block[1]];
+        pixels[2] = cm[pixels[2] + block[2]];
+        pixels[3] = cm[pixels[3] + block[3]];
+        pixels[4] = cm[pixels[4] + block[4]];
+        pixels[5] = cm[pixels[5] + block[5]];
+        pixels[6] = cm[pixels[6] + block[6]];
+        pixels[7] = cm[pixels[7] + block[7]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = cm[pixels[0] + block[0]];
+        pixels[1] = cm[pixels[1] + block[1]];
+        pixels[2] = cm[pixels[2] + block[2]];
+        pixels[3] = cm[pixels[3] + block[3]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = cm[pixels[0] + block[0]];
+        pixels[1] = cm[pixels[1] + block[1]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
+{
+    int i;
+    for(i=0;i<8;i++) {
+        pixels[0] += block[0];
+        pixels[1] += block[1];
+        pixels[2] += block[2];
+        pixels[3] += block[3];
+        pixels[4] += block[4];
+        pixels[5] += block[5];
+        pixels[6] += block[6];
+        pixels[7] += block[7];
+        pixels += line_size;
+        block += 8;
+    }
+}
+static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
+{
+    int i;
+    for(i=0;i<4;i++) {
+        pixels[0] += block[0];
+        pixels[1] += block[1];
+        pixels[2] += block[2];
+        pixels[3] += block[3];
+        pixels += line_size;
+        block += 4;
+    }
+}
+static int sum_abs_dctelem_c(DCTELEM *block)
+{
+    int sum=0, i;
+    for(i=0; i<64; i++)
+        sum+= FFABS(block[i]);
+    return sum;
+}
+static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
+{
+    int i;
+    for (i = 0; i < h; i++) {
+        memset(block, value, 16);
+        block += line_size;
+    }
+}
+static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
+{
+    int i;
+    for (i = 0; i < h; i++) {
+        memset(block, value, 8);
+        block += line_size;
+    }
+}
+static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize)
+{
+    int i, j;
+    uint16_t *dst1 = (uint16_t *) dst;
+    uint16_t *dst2 = (uint16_t *)(dst + linesize);
+    for (j = 0; j < 8; j++) {
+        for (i = 0; i < 8; i++) {
+            dst1[i] = dst2[i] = src[i] * 0x0101;
+        }
+        src  += 8;
+        dst1 += linesize;
+        dst2 += linesize;
+    }
+}
+#if 0
+#define PIXOP2(OPNAME, OP) \
+static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint64_t*)block), AV_RN64(pixels));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels          );\
+        const uint64_t b= AV_RN64(pixels+line_size);\
+        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels          );\
+        const uint64_t b= AV_RN64(pixels+line_size);\
+        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        uint64_t l0=  (a&0x0303030303030303ULL)\
+                    + (b&0x0303030303030303ULL)\
+                    + 0x0202020202020202ULL;\
+        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+        uint64_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint64_t a= AV_RN64(pixels  );\
+            uint64_t b= AV_RN64(pixels+1);\
+            l1=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL);\
+            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN64(pixels  );\
+            b= AV_RN64(pixels+1);\
+            l0=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL)\
+               + 0x0202020202020202ULL;\
+            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        uint64_t l0=  (a&0x0303030303030303ULL)\
+                    + (b&0x0303030303030303ULL)\
+                    + 0x0101010101010101ULL;\
+        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+        uint64_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint64_t a= AV_RN64(pixels  );\
+            uint64_t b= AV_RN64(pixels+1);\
+            l1=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL);\
+            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN64(pixels  );\
+            b= AV_RN64(pixels+1);\
+            l0=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL)\
+               + 0x0101010101010101ULL;\
+            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
+#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
+#else // 64 bit variant
+#define PIXOP2(OPNAME, OP) \
+static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint16_t*)(block  )), AV_RN16(pixels  ));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
+        OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN32(&src1[i*src_stride1  ]);\
+        b= AV_RN32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN32(&src1[i*src_stride1  ]);\
+        b= AV_RN32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN32(&src1[i*src_stride1  ]);\
+        b= AV_RN32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN16(&src1[i*src_stride1  ]);\
+        b= AV_RN16(&src2[i*src_stride2  ]);\
+        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
+    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
+    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a, b, c, d, l0, l1, h0, h1;\
+        a= AV_RN32(&src1[i*src_stride1]);\
+        b= AV_RN32(&src2[i*src_stride2]);\
+        c= AV_RN32(&src3[i*src_stride3]);\
+        d= AV_RN32(&src4[i*src_stride4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x02020202UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        c= AV_RN32(&src3[i*src_stride3+4]);\
+        d= AV_RN32(&src4[i*src_stride4+4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x02020202UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a, b, c, d, l0, l1, h0, h1;\
+        a= AV_RN32(&src1[i*src_stride1]);\
+        b= AV_RN32(&src2[i*src_stride2]);\
+        c= AV_RN32(&src3[i*src_stride3]);\
+        d= AV_RN32(&src4[i*src_stride4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x01010101UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        c= AV_RN32(&src3[i*src_stride3+4]);\
+        d= AV_RN32(&src4[i*src_stride4+4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x01010101UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+    }\
+}\
+static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+}\
+static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i, a0, b0, a1, b1;\
+        a0= pixels[0];\
+        b0= pixels[1] + 2;\
+        a0 += b0;\
+        b0 += pixels[2];\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            a1= pixels[0];\
+            b1= pixels[1];\
+            a1 += b1;\
+            b1 += pixels[2];\
+\
+            block[0]= (a1+a0)>>2; /* FIXME non put */\
+            block[1]= (b1+b0)>>2;\
+\
+            pixels+=line_size;\
+            block +=line_size;\
+\
+            a0= pixels[0];\
+            b0= pixels[1] + 2;\
+            a0 += b0;\
+            b0 += pixels[2];\
+\
+            block[0]= (a1+a0)>>2;\
+            block[1]= (b1+b0)>>2;\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint32_t a= AV_RN32(pixels  );\
+        const uint32_t b= AV_RN32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x02020202UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= AV_RN32(pixels  );\
+            uint32_t b= AV_RN32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN32(pixels  );\
+            b= AV_RN32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x02020202UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int j;\
+    for(j=0; j<2; j++){\
+        int i;\
+        const uint32_t a= AV_RN32(pixels  );\
+        const uint32_t b= AV_RN32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x02020202UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= AV_RN32(pixels  );\
+            uint32_t b= AV_RN32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN32(pixels  );\
+            b= AV_RN32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x02020202UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+        pixels+=4-line_size*(h+1);\
+        block +=4-line_size*h;\
+    }\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int j;\
+    for(j=0; j<2; j++){\
+        int i;\
+        const uint32_t a= AV_RN32(pixels  );\
+        const uint32_t b= AV_RN32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x01010101UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= AV_RN32(pixels  );\
+            uint32_t b= AV_RN32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN32(pixels  );\
+            b= AV_RN32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x01010101UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+        pixels+=4-line_size*(h+1);\
+        block +=4-line_size*h;\
+    }\
+}\
+\
+CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
+#define op_avg(a, b) a = rnd_avg32(a, b)
+#endif
+#define op_put(a, b) a = b
+PIXOP2(avg, op_avg)
+PIXOP2(put, op_put)
+#undef op_avg
+#undef op_put
+#define avg2(a,b) ((a+b+1)>>1)
+#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
+static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
+}
+static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
+}
+static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
+{
+    const int A=(16-x16)*(16-y16);
+    const int B=(   x16)*(16-y16);
+    const int C=(16-x16)*(   y16);
+    const int D=(   x16)*(   y16);
+    int i;
+    for(i=0; i<h; i++)
+    {
+        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
+        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
+        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
+        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
+        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
+        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
+        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
+        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
+        dst+= stride;
+        src+= stride;
+    }
+}
+void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
+{
+    int y, vx, vy;
+    const int s= 1<<shift;
+    width--;
+    height--;
+    for(y=0; y<h; y++){
+        int x;
+        vx= ox;
+        vy= oy;
+        for(x=0; x<8; x++){ //XXX FIXME optimize
+            int src_x, src_y, frac_x, frac_y, index;
+            src_x= vx>>16;
+            src_y= vy>>16;
+            frac_x= src_x&(s-1);
+            frac_y= src_y&(s-1);
+            src_x>>=shift;
+            src_y>>=shift;
+            if((unsigned)src_x < width){
+                if((unsigned)src_y < height){
+                    index= src_x + src_y*stride;
+                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
+                                           + src[index       +1]*   frac_x )*(s-frac_y)
+                                        + (  src[index+stride  ]*(s-frac_x)
+                                           + src[index+stride+1]*   frac_x )*   frac_y
+                                        + r)>>(shift*2);
+                }else{
+                    index= src_x + av_clip(src_y, 0, height)*stride;
+                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
+                                          + src[index       +1]*   frac_x )*s
+                                        + r)>>(shift*2);
+                }
+            }else{
+                if((unsigned)src_y < height){
+                    index= av_clip(src_x, 0, width) + src_y*stride;
+                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
+                                           + src[index+stride  ]*   frac_y )*s
+                                        + r)>>(shift*2);
+                }else{
+                    index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
+                    dst[y*stride + x]=    src[index         ];
+                }
+            }
+            vx+= dxx;
+            vy+= dyx;
+        }
+        ox += dxy;
+        oy += dyy;
+    }
+}
+static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    switch(width){
+    case 2: put_pixels2_c (dst, src, stride, height); break;
+    case 4: put_pixels4_c (dst, src, stride, height); break;
+    case 8: put_pixels8_c (dst, src, stride, height); break;
+    case 16:put_pixels16_c(dst, src, stride, height); break;
+    }
+}
+static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    switch(width){
+    case 2: avg_pixels2_c (dst, src, stride, height); break;
+    case 4: avg_pixels4_c (dst, src, stride, height); break;
+    case 8: avg_pixels8_c (dst, src, stride, height); break;
+    case 16:avg_pixels16_c(dst, src, stride, height); break;
+    }
+}
+static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+#if 0
+#define TPEL_WIDTH(width)\
+static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
+#endif
+#define H264_CHROMA_MC(OPNAME, OP)\
+static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }else{\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}\
+\
+static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }else{\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
+            OP(dst[2], (A*src[2] + E*src[step+2]));\
+            OP(dst[3], (A*src[3] + E*src[step+3]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}\
+\
+static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+            OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
+            OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
+            OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
+            OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }else{\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
+            OP(dst[2], (A*src[2] + E*src[step+2]));\
+            OP(dst[3], (A*src[3] + E*src[step+3]));\
+            OP(dst[4], (A*src[4] + E*src[step+4]));\
+            OP(dst[5], (A*src[5] + E*src[step+5]));\
+            OP(dst[6], (A*src[6] + E*src[step+6]));\
+            OP(dst[7], (A*src[7] + E*src[step+7]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
+#define op_put(a, b) a = (((b) + 32)>>6)
+H264_CHROMA_MC(put_       , op_put)
+H264_CHROMA_MC(avg_       , op_avg)
+#undef op_avg
+#undef op_put
+static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
+    const int A=(8-x)*(8-y);
+    const int B=(  x)*(8-y);
+    const int C=(8-x)*(  y);
+    const int D=(  x)*(  y);
+    int i;
+    assert(x<8 && y<8 && x>=0 && y>=0);
+    for(i=0; i<h; i++)
+    {
+        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
+        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
+        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
+        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
+        dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
+        dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
+        dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
+        dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
+        dst+= stride;
+        src+= stride;
+    }
+}
+static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
+    const int A=(8-x)*(8-y);
+    const int B=(  x)*(8-y);
+    const int C=(8-x)*(  y);
+    const int D=(  x)*(  y);
+    int i;
+    assert(x<8 && y<8 && x>=0 && y>=0);
+    for(i=0; i<h; i++)
+    {
+        dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
+        dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
+        dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
+        dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
+        dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
+        dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
+        dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
+        dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
+        dst+= stride;
+        src+= stride;
+    }
+}
+#define QPEL_MC(r, OPNAME, RND, OP) \
+static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
+        OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
+        OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
+        OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
+        OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int src0= src[0*srcStride];\
+        const int src1= src[1*srcStride];\
+        const int src2= src[2*srcStride];\
+        const int src3= src[3*srcStride];\
+        const int src4= src[4*srcStride];\
+        const int src5= src[5*srcStride];\
+        const int src6= src[6*srcStride];\
+        const int src7= src[7*srcStride];\
+        const int src8= src[8*srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
+        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
+        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
+        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
+        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    \
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
+        OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
+        OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
+        OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
+        OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
+        OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
+        OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
+        OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
+        OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
+        OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
+        OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
+        OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
+        OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
+        OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
+        OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
+        OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    const int w=16;\
+    for(i=0; i<w; i++)\
+    {\
+        const int src0= src[0*srcStride];\
+        const int src1= src[1*srcStride];\
+        const int src2= src[2*srcStride];\
+        const int src3= src[3*srcStride];\
+        const int src4= src[4*srcStride];\
+        const int src5= src[5*srcStride];\
+        const int src6= src[6*srcStride];\
+        const int src7= src[7*srcStride];\
+        const int src8= src[8*srcStride];\
+        const int src9= src[9*srcStride];\
+        const int src10= src[10*srcStride];\
+        const int src11= src[11*srcStride];\
+        const int src12= src[12*srcStride];\
+        const int src13= src[13*srcStride];\
+        const int src14= src[14*srcStride];\
+        const int src15= src[15*srcStride];\
+        const int src16= src[16*srcStride];\
+        OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
+        OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
+        OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
+        OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
+        OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
+        OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
+        OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
+        OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
+        OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
+        OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
+        OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
+        OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
+        OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
+        OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
+        OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
+        OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels8_c(dst, src, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
+    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
+    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t half[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
+    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    copy_block9(full, src, 16, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
+}\
+\
+static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t half[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
+    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels16_c(dst, src, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
+    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
+    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t half[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
+    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    copy_block17(full, src, 24, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
+}\
+\
+static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t half[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
+    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}\
+static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}
+#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
+#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
+#define op_put(a, b) a = cm[((b) + 16)>>5]
+#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
+QPEL_MC(0, put_       , _       , op_put)
+QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
+QPEL_MC(0, avg_       , _       , op_avg)
+//QPEL_MC(1, avg_no_rnd , _       , op_avg)
+#undef op_avg
+#undef op_avg_no_rnd
+#undef op_put
+#undef op_put_no_rnd
+#if 1
+#define H264_LOWPASS(OPNAME, OP, OP2) \
+static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=2;\
+    const int w=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        const int src5= src[5 *srcStride];\
+        const int src6= src[6 *srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=4;\
+    const int w=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        const int tmp5= tmp[5 *tmpStride];\
+        const int tmp6= tmp[6 *tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
+        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
+        OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
+        OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
+        OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
+        OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        const int src5= src[5 *srcStride];\
+        const int src6= src[6 *srcStride];\
+        const int src7= src[7 *srcStride];\
+        const int src8= src[8 *srcStride];\
+        const int src9= src[9 *srcStride];\
+        const int src10=src[10*srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
+        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
+        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
+        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=8;\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
+        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
+        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
+        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
+        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        const int tmp5= tmp[5 *tmpStride];\
+        const int tmp6= tmp[6 *tmpStride];\
+        const int tmp7= tmp[7 *tmpStride];\
+        const int tmp8= tmp[8 *tmpStride];\
+        const int tmp9= tmp[9 *tmpStride];\
+        const int tmp10=tmp[10*tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
+        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
+        OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
+        OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
+        OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
+        OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
+    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
+    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
+}\
+#define H264_MC(OPNAME, SIZE) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t half[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t half[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfV[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfV[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+#define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
+//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
+#define op_put(a, b)  a = cm[((b) + 16)>>5]
+#define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
+#define op2_put(a, b)  a = cm[((b) + 512)>>10]
+H264_LOWPASS(put_       , op_put, op2_put)
+H264_LOWPASS(avg_       , op_avg, op2_avg)
+H264_MC(put_, 2)
+H264_MC(put_, 4)
+H264_MC(put_, 8)
+H264_MC(put_, 16)
+H264_MC(avg_, 4)
+H264_MC(avg_, 8)
+H264_MC(avg_, 16)
+#undef op_avg
+#undef op_put
+#undef op2_avg
+#undef op2_put
+#endif
+static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int i;
+    for(i=0; i<h; i++){
+        dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
+        dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
+        dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
+        dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
+        dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
+        dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
+        dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
+        dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+#if CONFIG_CAVS_DECODER
+/* AVS specific */
+void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    put_pixels8_c(dst, src, stride, 8);
+}
+void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    avg_pixels8_c(dst, src, stride, 8);
+}
+void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    put_pixels16_c(dst, src, stride, 16);
+}
+void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    avg_pixels16_c(dst, src, stride, 16);
+}
+#endif /* CONFIG_CAVS_DECODER */
+#if CONFIG_VC1_DECODER
+/* VC-1 specific */
+void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    put_pixels8_c(dst, src, stride, 8);
+}
+void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    avg_pixels8_c(dst, src, stride, 8);
+}
+#endif /* CONFIG_VC1_DECODER */
+#if CONFIG_RV40_DECODER
+static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+    put_pixels16_xy2_c(dst, src, stride, 16);
+}
+static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+    avg_pixels16_xy2_c(dst, src, stride, 16);
+}
+static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+    put_pixels8_xy2_c(dst, src, stride, 8);
+}
+static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+    avg_pixels8_xy2_c(dst, src, stride, 8);
+}
+#endif /* CONFIG_RV40_DECODER */
+static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int i;
+    for(i=0; i<w; i++){
+        const int src_1= src[ -srcStride];
+        const int src0 = src[0          ];
+        const int src1 = src[  srcStride];
+        const int src2 = src[2*srcStride];
+        const int src3 = src[3*srcStride];
+        const int src4 = src[4*srcStride];
+        const int src5 = src[5*srcStride];
+        const int src6 = src[6*srcStride];
+        const int src7 = src[7*srcStride];
+        const int src8 = src[8*srcStride];
+        const int src9 = src[9*srcStride];
+        dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
+        dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
+        dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
+        dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
+        dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
+        dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
+        dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
+        dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
+        src++;
+        dst++;
+    }
+}
+static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
+    put_pixels8_c(dst, src, stride, 8);
+}
+static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t half[64];
+    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
+    put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
+}
+static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
+    wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
+}
+static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t half[64];
+    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
+    put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
+}
+static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
+    wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
+}
+static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
+    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
+    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
+    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
+    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
+}
+static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
+    if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
+    int x;
+    const int strength= ff_h263_loop_filter_strength[qscale];
+    for(x=0; x<8; x++){
+        int d1, d2, ad1;
+        int p0= src[x-2*stride];
+        int p1= src[x-1*stride];
+        int p2= src[x+0*stride];
+        int p3= src[x+1*stride];
+        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
+        if     (d<-2*strength) d1= 0;
+        else if(d<-  strength) d1=-2*strength - d;
+        else if(d<   strength) d1= d;
+        else if(d< 2*strength) d1= 2*strength - d;
+        else                   d1= 0;
+        p1 += d1;
+        p2 -= d1;
+        if(p1&256) p1= ~(p1>>31);
+        if(p2&256) p2= ~(p2>>31);
+        src[x-1*stride] = p1;
+        src[x+0*stride] = p2;
+        ad1= FFABS(d1)>>1;
+        d2= av_clip((p0-p3)/4, -ad1, ad1);
+        src[x-2*stride] = p0 - d2;
+        src[x+  stride] = p3 + d2;
+    }
+    }
+}
+static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
+    if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
+    int y;
+    const int strength= ff_h263_loop_filter_strength[qscale];
+    for(y=0; y<8; y++){
+        int d1, d2, ad1;
+        int p0= src[y*stride-2];
+        int p1= src[y*stride-1];
+        int p2= src[y*stride+0];
+        int p3= src[y*stride+1];
+        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
+        if     (d<-2*strength) d1= 0;
+        else if(d<-  strength) d1=-2*strength - d;
+        else if(d<   strength) d1= d;
+        else if(d< 2*strength) d1= 2*strength - d;
+        else                   d1= 0;
+        p1 += d1;
+        p2 -= d1;
+        if(p1&256) p1= ~(p1>>31);
+        if(p2&256) p2= ~(p2>>31);
+        src[y*stride-1] = p1;
+        src[y*stride+0] = p2;
+        ad1= FFABS(d1)>>1;
+        d2= av_clip((p0-p3)/4, -ad1, ad1);
+        src[y*stride-2] = p0 - d2;
+        src[y*stride+1] = p3 + d2;
+    }
+    }
+}
+static void h261_loop_filter_c(uint8_t *src, int stride){
+    int x,y,xy,yz;
+    int temp[64];
+    for(x=0; x<8; x++){
+        temp[x      ] = 4*src[x           ];
+        temp[x + 7*8] = 4*src[x + 7*stride];
+    }
+    for(y=1; y<7; y++){
+        for(x=0; x<8; x++){
+            xy = y * stride + x;
+            yz = y * 8 + x;
+            temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
+        }
+    }
+    for(y=0; y<8; y++){
+        src[  y*stride] = (temp[  y*8] + 2)>>2;
+        src[7+y*stride] = (temp[7+y*8] + 2)>>2;
+        for(x=1; x<7; x++){
+            xy = y * stride + x;
+            yz = y * 8 + x;
+            src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
+        }
+    }
+}
+static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - pix2[0]);
+        s += abs(pix1[1] - pix2[1]);
+        s += abs(pix1[2] - pix2[2]);
+        s += abs(pix1[3] - pix2[3]);
+        s += abs(pix1[4] - pix2[4]);
+        s += abs(pix1[5] - pix2[5]);
+        s += abs(pix1[6] - pix2[6]);
+        s += abs(pix1[7] - pix2[7]);
+        s += abs(pix1[8] - pix2[8]);
+        s += abs(pix1[9] - pix2[9]);
+        s += abs(pix1[10] - pix2[10]);
+        s += abs(pix1[11] - pix2[11]);
+        s += abs(pix1[12] - pix2[12]);
+        s += abs(pix1[13] - pix2[13]);
+        s += abs(pix1[14] - pix2[14]);
+        s += abs(pix1[15] - pix2[15]);
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
+        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
+        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
+        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
+        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
+        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
+        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
+        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
+        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
+        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
+        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
+        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
+        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
+        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
+        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
+        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
+        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
+        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
+        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
+        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
+        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
+        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
+        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
+        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
+        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
+        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
+        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
+        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
+        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
+        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
+        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
+        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
+        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
+        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
+        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
+        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
+        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
+        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
+        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
+        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
+        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
+        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
+        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
+        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
+        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
+        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - pix2[0]);
+        s += abs(pix1[1] - pix2[1]);
+        s += abs(pix1[2] - pix2[2]);
+        s += abs(pix1[3] - pix2[3]);
+        s += abs(pix1[4] - pix2[4]);
+        s += abs(pix1[5] - pix2[5]);
+        s += abs(pix1[6] - pix2[6]);
+        s += abs(pix1[7] - pix2[7]);
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
+        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
+        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
+        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
+        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
+        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
+        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
+        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
+        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
+        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
+        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
+        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
+        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
+        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
+        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
+        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
+        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
+        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
+        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
+        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
+        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
+        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
+    MpegEncContext *c = v;
+    int score1=0;
+    int score2=0;
+    int x,y;
+    for(y=0; y<h; y++){
+        for(x=0; x<16; x++){
+            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
+        }
+        if(y+1<h){
+            for(x=0; x<15; x++){
+                score2+= FFABS(  s1[x  ] - s1[x  +stride]
+                             - s1[x+1] + s1[x+1+stride])
+                        -FFABS(  s2[x  ] - s2[x  +stride]
+                             - s2[x+1] + s2[x+1+stride]);
+            }
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
+    else  return score1 + FFABS(score2)*8;
+}
+static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
+    MpegEncContext *c = v;
+    int score1=0;
+    int score2=0;
+    int x,y;
+    for(y=0; y<h; y++){
+        for(x=0; x<8; x++){
+            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
+        }
+        if(y+1<h){
+            for(x=0; x<7; x++){
+                score2+= FFABS(  s1[x  ] - s1[x  +stride]
+                             - s1[x+1] + s1[x+1+stride])
+                        -FFABS(  s2[x  ] - s2[x  +stride]
+                             - s2[x+1] + s2[x+1+stride]);
+            }
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
+    else  return score1 + FFABS(score2)*8;
+}
+static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
+    int i;
+    unsigned int sum=0;
+    for(i=0; i<8*8; i++){
+        int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
+        int w= weight[i];
+        b>>= RECON_SHIFT;
+        assert(-512<b && b<512);
+        sum += (w*b)*(w*b)>>4;
+    }
+    return sum>>2;
+}
+static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
+    int i;
+    for(i=0; i<8*8; i++){
+        rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
+    }
+}
+/**
+ * permutes an 8x8 block.
+ * @param block the block which will be permuted according to the given permutation vector
+ * @param permutation the permutation vector
+ * @param last the last non zero coefficient in scantable order, used to speed the permutation up
+ * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
+ *                  (inverse) permutated to scantable order!
+ */
+void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
+{
+    int i;
+    DCTELEM temp[64];
+    if(last<=0) return;
+    //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
+    for(i=0; i<=last; i++){
+        const int j= scantable[i];
+        temp[j]= block[j];
+        block[j]=0;
+    }
+    for(i=0; i<=last; i++){
+        const int j= scantable[i];
+        const int perm_j= permutation[j];
+        block[perm_j]= temp[j];
+    }
+}
+static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
+    return 0;
+}
+void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
+    int i;
+    memset(cmp, 0, sizeof(void*)*6);
+    for(i=0; i<6; i++){
+        switch(type&0xFF){
+        case FF_CMP_SAD:
+            cmp[i]= c->sad[i];
+            break;
+        case FF_CMP_SATD:
+            cmp[i]= c->hadamard8_diff[i];
+            break;
+        case FF_CMP_SSE:
+            cmp[i]= c->sse[i];
+            break;
+        case FF_CMP_DCT:
+            cmp[i]= c->dct_sad[i];
+            break;
+        case FF_CMP_DCT264:
+            cmp[i]= c->dct264_sad[i];
+            break;
+        case FF_CMP_DCTMAX:
+            cmp[i]= c->dct_max[i];
+            break;
+        case FF_CMP_PSNR:
+            cmp[i]= c->quant_psnr[i];
+            break;
+        case FF_CMP_BIT:
+            cmp[i]= c->bit[i];
+            break;
+        case FF_CMP_RD:
+            cmp[i]= c->rd[i];
+            break;
+        case FF_CMP_VSAD:
+            cmp[i]= c->vsad[i];
+            break;
+        case FF_CMP_VSSE:
+            cmp[i]= c->vsse[i];
+            break;
+        case FF_CMP_ZERO:
+            cmp[i]= zero_cmp;
+            break;
+        case FF_CMP_NSSE:
+            cmp[i]= c->nsse[i];
+            break;
+#if CONFIG_DWT
+        case FF_CMP_W53:
+            cmp[i]= c->w53[i];
+            break;
+        case FF_CMP_W97:
+            cmp[i]= c->w97[i];
+            break;
+#endif
+        default:
+            av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
+        }
+    }
+}
+static void clear_block_c(DCTELEM *block)
+{
+    memset(block, 0, sizeof(DCTELEM)*64);
+}
+/**
+ * memset(blocks, 0, sizeof(DCTELEM)*6*64)
+ */
+static void clear_blocks_c(DCTELEM *blocks)
+{
+    memset(blocks, 0, sizeof(DCTELEM)*6*64);
+}
+static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
+    long i;
+    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+        long a = *(long*)(src+i);
+        long b = *(long*)(dst+i);
+        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
+    }
+    for(; i<w; i++)
+        dst[i+0] += src[i+0];
+}
+static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
+    long i;
+    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+        long a = *(long*)(src1+i);
+        long b = *(long*)(src2+i);
+        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
+    }
+    for(; i<w; i++)
+        dst[i] = src1[i]+src2[i];
+}
+static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
+    long i;
+#if !HAVE_FAST_UNALIGNED
+    if((long)src2 & (sizeof(long)-1)){
+        for(i=0; i+7<w; i+=8){
+            dst[i+0] = src1[i+0]-src2[i+0];
+            dst[i+1] = src1[i+1]-src2[i+1];
+            dst[i+2] = src1[i+2]-src2[i+2];
+            dst[i+3] = src1[i+3]-src2[i+3];
+            dst[i+4] = src1[i+4]-src2[i+4];
+            dst[i+5] = src1[i+5]-src2[i+5];
+            dst[i+6] = src1[i+6]-src2[i+6];
+            dst[i+7] = src1[i+7]-src2[i+7];
+        }
+    }else
+#endif
+    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+        long a = *(long*)(src1+i);
+        long b = *(long*)(src2+i);
+        *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
+    }
+    for(; i<w; i++)
+        dst[i+0] = src1[i+0]-src2[i+0];
+}
+static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
+    int i;
+    uint8_t l, lt;
+    l= *left;
+    lt= *left_top;
+    for(i=0; i<w; i++){
+        l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
+        lt= src1[i];
+        dst[i]= l;
+    }
+    *left= l;
+    *left_top= lt;
+}
+static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
+    int i;
+    uint8_t l, lt;
+    l= *left;
+    lt= *left_top;
+    for(i=0; i<w; i++){
+        const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
+        lt= src1[i];
+        l= src2[i];
+        dst[i]= l - pred;
+    }
+    *left= l;
+    *left_top= lt;
+}
+static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
+    int i;
+    for(i=0; i<w-1; i++){
+        acc+= src[i];
+        dst[i]= acc;
+        i++;
+        acc+= src[i];
+        dst[i]= acc;
+    }
+    for(; i<w; i++){
+        acc+= src[i];
+        dst[i]= acc;
+    }
+    return acc;
+}
+#if HAVE_BIGENDIAN
+#define B 3
+#define G 2
+#define R 1
+#define A 0
+#else
+#define B 0
+#define G 1
+#define R 2
+#define A 3
+#endif
+static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
+    int i;
+    int r,g,b,a;
+    r= *red;
+    g= *green;
+    b= *blue;
+    a= *alpha;
+    for(i=0; i<w; i++){
+        b+= src[4*i+B];
+        g+= src[4*i+G];
+        r+= src[4*i+R];
+        a+= src[4*i+A];
+        dst[4*i+B]= b;
+        dst[4*i+G]= g;
+        dst[4*i+R]= r;
+        dst[4*i+A]= a;
+    }
+    *red= r;
+    *green= g;
+    *blue= b;
+    *alpha= a;
+}
+#undef B
+#undef G
+#undef R
+#undef A
+#define BUTTERFLY2(o1,o2,i1,i2) \
+o1= (i1)+(i2);\
+o2= (i1)-(i2);
+#define BUTTERFLY1(x,y) \
+{\
+    int a,b;\
+    a= x;\
+    b= y;\
+    x= a+b;\
+    y= a-b;\
+}
+#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
+static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
+    int i;
+    int temp[64];
+    int sum=0;
+    assert(h==8);
+    for(i=0; i<8; i++){
+        //FIXME try pointer walks
+        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
+        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
+        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
+        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
+        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
+        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
+        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
+        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
+    }
+    for(i=0; i<8; i++){
+        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
+        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
+        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
+        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
+        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
+        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
+        sum +=
+             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
+    }
+#if 0
+static int maxi=0;
+if(sum>maxi){
+    maxi=sum;
+    printf("MAX:%d\n", maxi);
+}
+#endif
+    return sum;
+}
+static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
+    int i;
+    int temp[64];
+    int sum=0;
+    assert(h==8);
+    for(i=0; i<8; i++){
+        //FIXME try pointer walks
+        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
+        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
+        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
+        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
+        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
+        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
+        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
+        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
+    }
+    for(i=0; i<8; i++){
+        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
+        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
+        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
+        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
+        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
+        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
+        sum +=
+             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
+    }
+    sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
+    return sum;
+}
+static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+    assert(h==8);
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    s->dsp.fdct(temp);
+    return s->dsp.sum_abs_dctelem(temp);
+}
+#if CONFIG_GPL
+#define DCT8_1D {\
+    const int s07 = SRC(0) + SRC(7);\
+    const int s16 = SRC(1) + SRC(6);\
+    const int s25 = SRC(2) + SRC(5);\
+    const int s34 = SRC(3) + SRC(4);\
+    const int a0 = s07 + s34;\
+    const int a1 = s16 + s25;\
+    const int a2 = s07 - s34;\
+    const int a3 = s16 - s25;\
+    const int d07 = SRC(0) - SRC(7);\
+    const int d16 = SRC(1) - SRC(6);\
+    const int d25 = SRC(2) - SRC(5);\
+    const int d34 = SRC(3) - SRC(4);\
+    const int a4 = d16 + d25 + (d07 + (d07>>1));\
+    const int a5 = d07 - d34 - (d25 + (d25>>1));\
+    const int a6 = d07 + d34 - (d16 + (d16>>1));\
+    const int a7 = d16 - d25 + (d34 + (d34>>1));\
+    DST(0,  a0 + a1     ) ;\
+    DST(1,  a4 + (a7>>2)) ;\
+    DST(2,  a2 + (a3>>1)) ;\
+    DST(3,  a5 + (a6>>2)) ;\
+    DST(4,  a0 - a1     ) ;\
+    DST(5,  a6 - (a5>>2)) ;\
+    DST(6, (a2>>1) - a3 ) ;\
+    DST(7, (a4>>2) - a7 ) ;\
+}
+static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    DCTELEM dct[8][8];
+    int i;
+    int sum=0;
+    s->dsp.diff_pixels(dct[0], src1, src2, stride);
+#define SRC(x) dct[i][x]
+#define DST(x,v) dct[i][x]= v
+    for( i = 0; i < 8; i++ )
+        DCT8_1D
+#undef SRC
+#undef DST
+#define SRC(x) dct[x][i]
+#define DST(x,v) sum += FFABS(v)
+    for( i = 0; i < 8; i++ )
+        DCT8_1D
+#undef SRC
+#undef DST
+    return sum;
+}
+#endif
+static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+    int sum=0, i;
+    assert(h==8);
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    s->dsp.fdct(temp);
+    for(i=0; i<64; i++)
+        sum= FFMAX(sum, FFABS(temp[i]));
+    return sum;
+}
+static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
+    DCTELEM * const bak = temp+64;
+    int sum=0, i;
+    assert(h==8);
+    s->mb_intra=0;
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    memcpy(bak, temp, 64*sizeof(DCTELEM));
+    s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+    s->dct_unquantize_inter(s, temp, 0, s->qscale);
+    ff_simple_idct(temp); //FIXME
+    for(i=0; i<64; i++)
+        sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
+    return sum;
+}
+static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    const uint8_t *scantable= s->intra_scantable.permutated;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+    LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
+    LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
+    int i, last, run, bits, level, distortion, start_i;
+    const int esc_length= s->ac_esc_length;
+    uint8_t * length;
+    uint8_t * last_length;
+    assert(h==8);
+    copy_block8(lsrc1, src1, 8, stride, 8);
+    copy_block8(lsrc2, src2, 8, stride, 8);
+    s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
+    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+    bits=0;
+    if (s->mb_intra) {
+        start_i = 1;
+        length     = s->intra_ac_vlc_length;
+        last_length= s->intra_ac_vlc_last_length;
+        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
+    } else {
+        start_i = 0;
+        length     = s->inter_ac_vlc_length;
+        last_length= s->inter_ac_vlc_last_length;
+    }
+    if(last>=start_i){
+        run=0;
+        for(i=start_i; i<last; i++){
+            int j= scantable[i];
+            level= temp[j];
+            if(level){
+                level+=64;
+                if((level&(~127)) == 0){
+                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
+                }else
+                    bits+= esc_length;
+                run=0;
+            }else
+                run++;
+        }
+        i= scantable[last];
+        level= temp[i] + 64;
+        assert(level - 64);
+        if((level&(~127)) == 0){
+            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
+        }else
+            bits+= esc_length;
+    }
+    if(last>=0){
+        if(s->mb_intra)
+            s->dct_unquantize_intra(s, temp, 0, s->qscale);
+        else
+            s->dct_unquantize_inter(s, temp, 0, s->qscale);
+    }
+    s->dsp.idct_add(lsrc2, 8, temp);
+    distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
+    return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
+}
+static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    const uint8_t *scantable= s->intra_scantable.permutated;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+    int i, last, run, bits, level, start_i;
+    const int esc_length= s->ac_esc_length;
+    uint8_t * length;
+    uint8_t * last_length;
+    assert(h==8);
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+    bits=0;
+    if (s->mb_intra) {
+        start_i = 1;
+        length     = s->intra_ac_vlc_length;
+        last_length= s->intra_ac_vlc_last_length;
+        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
+    } else {
+        start_i = 0;
+        length     = s->inter_ac_vlc_length;
+        last_length= s->inter_ac_vlc_last_length;
+    }
+    if(last>=start_i){
+        run=0;
+        for(i=start_i; i<last; i++){
+            int j= scantable[i];
+            level= temp[j];
+            if(level){
+                level+=64;
+                if((level&(~127)) == 0){
+                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
+                }else
+                    bits+= esc_length;
+                run=0;
+            }else
+                run++;
+        }
+        i= scantable[last];
+        level= temp[i] + 64;
+        assert(level - 64);
+        if((level&(~127)) == 0){
+            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
+        }else
+            bits+= esc_length;
+    }
+    return bits;
+}
+#define VSAD_INTRA(size) \
+static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
+    int score=0;                                                                                            \
+    int x,y;                                                                                                \
+                                                                                                            \
+    for(y=1; y<h; y++){                                                                                     \
+        for(x=0; x<size; x+=4){                                                                             \
+            score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])                           \
+                   +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);                          \
+        }                                                                                                   \
+        s+= stride;                                                                                         \
+    }                                                                                                       \
+                                                                                                            \
+    return score;                                                                                           \
+}
+VSAD_INTRA(8)
+VSAD_INTRA(16)
+static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
+    int score=0;
+    int x,y;
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x++){
+            score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+    return score;
+}
+#define SQ(a) ((a)*(a))
+#define VSSE_INTRA(size) \
+static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
+    int score=0;                                                                                            \
+    int x,y;                                                                                                \
+                                                                                                            \
+    for(y=1; y<h; y++){                                                                                     \
+        for(x=0; x<size; x+=4){                                                                               \
+            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])                                 \
+                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);                                \
+        }                                                                                                   \
+        s+= stride;                                                                                         \
+    }                                                                                                       \
+                                                                                                            \
+    return score;                                                                                           \
+}
+VSSE_INTRA(8)
+VSSE_INTRA(16)
+static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
+    int score=0;
+    int x,y;
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x++){
+            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+    return score;
+}
+static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
+                               int size){
+    int score=0;
+    int i;
+    for(i=0; i<size; i++)
+        score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
+    return score;
+}
+WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
+WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
+WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
+#if CONFIG_GPL
+WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
+#endif
+WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
+WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
+WRAPPER8_16_SQ(rd8x8_c, rd16_c)
+WRAPPER8_16_SQ(bit8x8_c, bit16_c)
+#endif /* 0 */
+static void vector_fmul_c(float *dst, const float *src, int len){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i] *= src[i];
+}
+static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
+    int i;
+    src1 += len-1;
+    for(i=0; i<len; i++)
+        dst[i] = src0[i] * src1[-i];
+}
+static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i] = src0[i] * src1[i] + src2[i];
+}
+void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
+    int i,j;
+    dst += len;
+    win += len;
+    src0+= len;
+    for(i=-len, j=len-1; i<0; i++, j--) {
+        float s0 = src0[i];
+        float s1 = src1[j];
+        float wi = win[i];
+        float wj = win[j];
+        dst[i] = s0*wj - s1*wi + add_bias;
+        dst[j] = s0*wi + s1*wj + add_bias;
+    }
+}
+static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
+                                 int len)
+{
+    int i;
+    for (i = 0; i < len; i++)
+        dst[i] = src[i] * mul;
+}
+#if 0
+static void vector_fmul_sv_scalar_2_c(float *dst, const float *src,
+                                      const float **sv, float mul, int len)
+{
+    int i;
+    for (i = 0; i < len; i += 2, sv++) {
+        dst[i  ] = src[i  ] * sv[0][0] * mul;
+        dst[i+1] = src[i+1] * sv[0][1] * mul;
+    }
+}
+static void vector_fmul_sv_scalar_4_c(float *dst, const float *src,
+                                      const float **sv, float mul, int len)
+{
+    int i;
+    for (i = 0; i < len; i += 4, sv++) {
+        dst[i  ] = src[i  ] * sv[0][0] * mul;
+        dst[i+1] = src[i+1] * sv[0][1] * mul;
+        dst[i+2] = src[i+2] * sv[0][2] * mul;
+        dst[i+3] = src[i+3] * sv[0][3] * mul;
+    }
+}
+static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul,
+                               int len)
+{
+    int i;
+    for (i = 0; i < len; i += 2, sv++) {
+        dst[i  ] = sv[0][0] * mul;
+        dst[i+1] = sv[0][1] * mul;
+    }
+}
+static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
+                               int len)
+{
+    int i;
+    for (i = 0; i < len; i += 4, sv++) {
+        dst[i  ] = sv[0][0] * mul;
+        dst[i+1] = sv[0][1] * mul;
+        dst[i+2] = sv[0][2] * mul;
+        dst[i+3] = sv[0][3] * mul;
+    }
+}
+static void butterflies_float_c(float *restrict v1, float *restrict v2,
+                                int len)
+{
+    int i;
+    for (i = 0; i < len; i++) {
+        float t = v1[i] - v2[i];
+        v1[i] += v2[i];
+        v2[i] = t;
+    }
+}
+static float scalarproduct_float_c(const float *v1, const float *v2, int len)
+{
+    float p = 0.0;
+    int i;
+    for (i = 0; i < len; i++)
+        p += v1[i] * v2[i];
+    return p;
+}
+static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i] = src[i] * mul;
+}
+static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
+                   uint32_t maxi, uint32_t maxisign)
+{
+    if(a > mini) return mini;
+    else if((a^(1<<31)) > maxisign) return maxi;
+    else return a;
+}
+static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
+    int i;
+    uint32_t mini = *(uint32_t*)min;
+    uint32_t maxi = *(uint32_t*)max;
+    uint32_t maxisign = maxi ^ (1<<31);
+    uint32_t *dsti = (uint32_t*)dst;
+    const uint32_t *srci = (const uint32_t*)src;
+    for(i=0; i<len; i+=8) {
+        dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
+        dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
+        dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
+        dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
+        dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
+        dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
+        dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
+        dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
+    }
+}
+static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
+    int i;
+    if(min < 0 && max > 0) {
+        vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
+    } else {
+        for(i=0; i < len; i+=8) {
+            dst[i    ] = av_clipf(src[i    ], min, max);
+            dst[i + 1] = av_clipf(src[i + 1], min, max);
+            dst[i + 2] = av_clipf(src[i + 2], min, max);
+            dst[i + 3] = av_clipf(src[i + 3], min, max);
+            dst[i + 4] = av_clipf(src[i + 4], min, max);
+            dst[i + 5] = av_clipf(src[i + 5], min, max);
+            dst[i + 6] = av_clipf(src[i + 6], min, max);
+            dst[i + 7] = av_clipf(src[i + 7], min, max);
+        }
+    }
+}
+static av_always_inline int float_to_int16_one(const float *src){
+    int_fast32_t tmp = *(const int32_t*)src;
+    if(tmp & 0xf0000){
+        tmp = (0x43c0ffff - tmp)>>31;
+        // is this faster on some gcc/cpu combinations?
+//      if(tmp > 0x43c0ffff) tmp = 0xFFFF;
+//      else                 tmp = 0;
+    }
+    return tmp - 0x8000;
+}
+void ff_float_to_int16_c(int16_t *dst, const float *src, long len){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i] = float_to_int16_one(src+i);
+}
+void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){
+    int i,j,c;
+    if(channels==2){
+        for(i=0; i<len; i++){
+            dst[2*i]   = float_to_int16_one(src[0]+i);
+            dst[2*i+1] = float_to_int16_one(src[1]+i);
+        }
+    }else{
+        for(c=0; c<channels; c++)
+            for(i=0, j=c; i<len; i++, j+=channels)
+                dst[j] = float_to_int16_one(src[c]+i);
+    }
+}
+static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift)
+{
+    int res = 0;
+    while (order--)
+        res += (*v1++ * *v2++) >> shift;
+    return res;
+}
+static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul)
+{
+    int res = 0;
+    while (order--) {
+        res   += *v1 * *v2++;
+        *v1++ += mul * *v3++;
+    }
+    return res;
+}
+#define W0 2048
+#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
+#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
+#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
+#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
+#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
+#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
+#define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
+static void wmv2_idct_row(short * b)
+{
+    int s1,s2;
+    int a0,a1,a2,a3,a4,a5,a6,a7;
+    /*step 1*/
+    a1 = W1*b[1]+W7*b[7];
+    a7 = W7*b[1]-W1*b[7];
+    a5 = W5*b[5]+W3*b[3];
+    a3 = W3*b[5]-W5*b[3];
+    a2 = W2*b[2]+W6*b[6];
+    a6 = W6*b[2]-W2*b[6];
+    a0 = W0*b[0]+W0*b[4];
+    a4 = W0*b[0]-W0*b[4];
+    /*step 2*/
+    s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
+    s2 = (181*(a1-a5-a7+a3)+128)>>8;
+    /*step 3*/
+    b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
+    b[1] = (a4+a6 +s1   + (1<<7))>>8;
+    b[2] = (a4-a6 +s2   + (1<<7))>>8;
+    b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
+    b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
+    b[5] = (a4-a6 -s2   + (1<<7))>>8;
+    b[6] = (a4+a6 -s1   + (1<<7))>>8;
+    b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
+}
+static void wmv2_idct_col(short * b)
+{
+    int s1,s2;
+    int a0,a1,a2,a3,a4,a5,a6,a7;
+    /*step 1, with extended precision*/
+    a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
+    a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
+    a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
+    a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
+    a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
+    a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
+    a0 = (W0*b[8*0]+W0*b[8*4]    )>>3;
+    a4 = (W0*b[8*0]-W0*b[8*4]    )>>3;
+    /*step 2*/
+    s1 = (181*(a1-a5+a7-a3)+128)>>8;
+    s2 = (181*(a1-a5-a7+a3)+128)>>8;
+    /*step 3*/
+    b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
+    b[8*1] = (a4+a6 +s1   + (1<<13))>>14;
+    b[8*2] = (a4-a6 +s2   + (1<<13))>>14;
+    b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
+    b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
+    b[8*5] = (a4-a6 -s2   + (1<<13))>>14;
+    b[8*6] = (a4+a6 -s1   + (1<<13))>>14;
+    b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
+}
+void ff_wmv2_idct_c(short * block){
+    int i;
+    for(i=0;i<64;i+=8){
+        wmv2_idct_row(block+i);
+    }
+    for(i=0;i<8;i++){
+        wmv2_idct_col(block+i);
+    }
+}
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+ converted */
+static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_wmv2_idct_c(block);
+    put_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_wmv2_idct_c(block);
+    add_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct (block);
+    put_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct (block);
+    add_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct4 (block);
+    put_pixels_clamped4_c(block, dest, line_size);
+}
+static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct4 (block);
+    add_pixels_clamped4_c(block, dest, line_size);
+}
+static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct2 (block);
+    put_pixels_clamped2_c(block, dest, line_size);
+}
+static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct2 (block);
+    add_pixels_clamped2_c(block, dest, line_size);
+}
+static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    dest[0] = cm[(block[0] + 4)>>3];
+}
+static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
+}
+static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
+/* init static data */
+av_cold void dsputil_static_init(void)
+{
+    int i;
+    for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
+    for(i=0;i<MAX_NEG_CROP;i++) {
+        ff_cropTbl[i] = 0;
+        ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
+    }
+    for(i=0;i<512;i++) {
+        ff_squareTbl[i] = (i - 256) * (i - 256);
+    }
+    for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
+}
+int ff_check_alignment(void){
+    static int did_fail=0;
+    DECLARE_ALIGNED(16, int, aligned);
+    if((intptr_t)&aligned & 15){
+        if(!did_fail){
+#if HAVE_MMX || HAVE_ALTIVEC
+            av_log(NULL, AV_LOG_ERROR,
+                "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
+                "and may be very slow or crash. This is not a bug in libavcodec,\n"
+                "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
+                "Do not report crashes to FFmpeg developers.\n");
+#endif
+            did_fail=1;
+        }
+        return -1;
+    }
+    return 0;
+}
+#endif
+av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
+{
+#if 0
+    int i;
+    ff_check_alignment();
+#if CONFIG_ENCODERS
+    if(avctx->dct_algo==FF_DCT_FASTINT) {
+        c->fdct = fdct_ifast;
+        c->fdct248 = fdct_ifast248;
+    }
+    else if(avctx->dct_algo==FF_DCT_FAAN) {
+        c->fdct = ff_faandct;
+        c->fdct248 = ff_faandct248;
+    }
+    else {
+        c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
+        c->fdct248 = ff_fdct248_islow;
+    }
+#endif //CONFIG_ENCODERS
+    if(avctx->lowres==1){
+        if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){
+            c->idct_put= ff_jref_idct4_put;
+            c->idct_add= ff_jref_idct4_add;
+        }else{
+            c->idct_put= ff_h264_lowres_idct_put_c;
+            c->idct_add= ff_h264_lowres_idct_add_c;
+        }
+        c->idct    = j_rev_dct4;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }else if(avctx->lowres==2){
+        c->idct_put= ff_jref_idct2_put;
+        c->idct_add= ff_jref_idct2_add;
+        c->idct    = j_rev_dct2;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }else if(avctx->lowres==3){
+        c->idct_put= ff_jref_idct1_put;
+        c->idct_add= ff_jref_idct1_add;
+        c->idct    = j_rev_dct1;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }else{
+        if(avctx->idct_algo==FF_IDCT_INT){
+            c->idct_put= ff_jref_idct_put;
+            c->idct_add= ff_jref_idct_add;
+            c->idct    = j_rev_dct;
+            c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
+        }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
+                avctx->idct_algo==FF_IDCT_VP3){
+            c->idct_put= ff_vp3_idct_put_c;
+            c->idct_add= ff_vp3_idct_add_c;
+            c->idct    = ff_vp3_idct_c;
+            c->idct_permutation_type= FF_NO_IDCT_PERM;
+        }else if(avctx->idct_algo==FF_IDCT_WMV2){
+            c->idct_put= ff_wmv2_idct_put_c;
+            c->idct_add= ff_wmv2_idct_add_c;
+            c->idct    = ff_wmv2_idct_c;
+            c->idct_permutation_type= FF_NO_IDCT_PERM;
+        }else if(avctx->idct_algo==FF_IDCT_FAAN){
+            c->idct_put= ff_faanidct_put;
+            c->idct_add= ff_faanidct_add;
+            c->idct    = ff_faanidct;
+            c->idct_permutation_type= FF_NO_IDCT_PERM;
+        }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
+            c->idct_put= ff_ea_idct_put_c;
+            c->idct_permutation_type= FF_NO_IDCT_PERM;
+        }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) {
+            c->idct     = ff_bink_idct_c;
+            c->idct_add = ff_bink_idct_add_c;
+            c->idct_put = ff_bink_idct_put_c;
+            c->idct_permutation_type = FF_NO_IDCT_PERM;
+        }else{ //accurate/default
+            c->idct_put= ff_simple_idct_put;
+            c->idct_add= ff_simple_idct_add;
+            c->idct    = ff_simple_idct;
+            c->idct_permutation_type= FF_NO_IDCT_PERM;
+        }
+    }
+    c->get_pixels = get_pixels_c;
+    c->diff_pixels = diff_pixels_c;
+    c->put_pixels_clamped = put_pixels_clamped_c;
+    c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
+    c->put_pixels_nonclamped = put_pixels_nonclamped_c;
+    c->add_pixels_clamped = add_pixels_clamped_c;
+    c->add_pixels8 = add_pixels8_c;
+    c->add_pixels4 = add_pixels4_c;
+    c->sum_abs_dctelem = sum_abs_dctelem_c;
+    c->gmc1 = gmc1_c;
+    c->gmc = ff_gmc_c;
+    c->clear_block = clear_block_c;
+    c->clear_blocks = clear_blocks_c;
+    c->pix_sum = pix_sum_c;
+    c->pix_norm1 = pix_norm1_c;
+    c->fill_block_tab[0] = fill_block16_c;
+    c->fill_block_tab[1] = fill_block8_c;
+    c->scale_block = scale_block_c;
+    /* TODO [0] 16  [1] 8 */
+    c->pix_abs[0][0] = pix_abs16_c;
+    c->pix_abs[0][1] = pix_abs16_x2_c;
+    c->pix_abs[0][2] = pix_abs16_y2_c;
+    c->pix_abs[0][3] = pix_abs16_xy2_c;
+    c->pix_abs[1][0] = pix_abs8_c;
+    c->pix_abs[1][1] = pix_abs8_x2_c;
+    c->pix_abs[1][2] = pix_abs8_y2_c;
+    c->pix_abs[1][3] = pix_abs8_xy2_c;
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c;     \
+    c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c;  \
+    c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c;  \
+    c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
+    dspfunc(put, 0, 16);
+    dspfunc(put_no_rnd, 0, 16);
+    dspfunc(put, 1, 8);
+    dspfunc(put_no_rnd, 1, 8);
+    dspfunc(put, 2, 4);
+    dspfunc(put, 3, 2);
+    dspfunc(avg, 0, 16);
+    dspfunc(avg_no_rnd, 0, 16);
+    dspfunc(avg, 1, 8);
+    dspfunc(avg_no_rnd, 1, 8);
+    dspfunc(avg, 2, 4);
+    dspfunc(avg, 3, 2);
+#undef dspfunc
+    c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
+    c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
+    c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
+    c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
+    c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
+    c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
+    c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
+    c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
+    c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
+    c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
+    c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
+    c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
+    c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
+    c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
+    c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
+    c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
+    c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
+    c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
+    c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
+    c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
+    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
+    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
+    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
+    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
+    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
+    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
+    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
+    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
+    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
+    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
+    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
+    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
+    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
+    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
+    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
+    dspfunc(put_qpel, 0, 16);
+    dspfunc(put_no_rnd_qpel, 0, 16);
+    dspfunc(avg_qpel, 0, 16);
+    /* dspfunc(avg_no_rnd_qpel, 0, 16); */
+    dspfunc(put_qpel, 1, 8);
+    dspfunc(put_no_rnd_qpel, 1, 8);
+    dspfunc(avg_qpel, 1, 8);
+    /* dspfunc(avg_no_rnd_qpel, 1, 8); */
+    dspfunc(put_h264_qpel, 0, 16);
+    dspfunc(put_h264_qpel, 1, 8);
+    dspfunc(put_h264_qpel, 2, 4);
+    dspfunc(put_h264_qpel, 3, 2);
+    dspfunc(avg_h264_qpel, 0, 16);
+    dspfunc(avg_h264_qpel, 1, 8);
+    dspfunc(avg_h264_qpel, 2, 4);
+#undef dspfunc
+    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
+    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
+    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
+    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
+    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
+    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
+    c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
+    c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
+    c->draw_edges = draw_edges_c;
+#if CONFIG_CAVS_DECODER
+    ff_cavsdsp_init(c,avctx);
+#endif
+#if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
+    ff_mlp_init(c, avctx);
+#endif
+#if CONFIG_VC1_DECODER
+    ff_vc1dsp_init(c,avctx);
+#endif
+#if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
+    ff_intrax8dsp_init(c,avctx);
+#endif
+#if CONFIG_RV30_DECODER
+    ff_rv30dsp_init(c,avctx);
+#endif
+#if CONFIG_RV40_DECODER
+    ff_rv40dsp_init(c,avctx);
+    c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
+    c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
+    c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
+    c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
+#endif
+    c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
+    c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
+    c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
+    c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
+    c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
+    c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
+    c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
+    c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
+#define SET_CMP_FUNC(name) \
+    c->name[0]= name ## 16_c;\
+    c->name[1]= name ## 8x8_c;
+    SET_CMP_FUNC(hadamard8_diff)
+    c->hadamard8_diff[4]= hadamard8_intra16_c;
+    c->hadamard8_diff[5]= hadamard8_intra8x8_c;
+    SET_CMP_FUNC(dct_sad)
+    SET_CMP_FUNC(dct_max)
+#if CONFIG_GPL
+    SET_CMP_FUNC(dct264_sad)
+#endif
+    c->sad[0]= pix_abs16_c;
+    c->sad[1]= pix_abs8_c;
+    c->sse[0]= sse16_c;
+    c->sse[1]= sse8_c;
+    c->sse[2]= sse4_c;
+    SET_CMP_FUNC(quant_psnr)
+    SET_CMP_FUNC(rd)
+    SET_CMP_FUNC(bit)
+    c->vsad[0]= vsad16_c;
+    c->vsad[4]= vsad_intra16_c;
+    c->vsad[5]= vsad_intra8_c;
+    c->vsse[0]= vsse16_c;
+    c->vsse[4]= vsse_intra16_c;
+    c->vsse[5]= vsse_intra8_c;
+    c->nsse[0]= nsse16_c;
+    c->nsse[1]= nsse8_c;
+#if CONFIG_DWT
+    ff_dsputil_init_dwt(c);
+#endif
+    c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
+    c->add_bytes= add_bytes_c;
+    c->add_bytes_l2= add_bytes_l2_c;
+    c->diff_bytes= diff_bytes_c;
+    c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
+    c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
+    c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
+    c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
+    c->bswap_buf= bswap_buf;
+#if CONFIG_PNG_DECODER
+    c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
+#endif
+    if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
+        c->h263_h_loop_filter= h263_h_loop_filter_c;
+        c->h263_v_loop_filter= h263_v_loop_filter_c;
+    }
+    if (CONFIG_VP3_DECODER) {
+        c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
+        c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
+    }
+    if (CONFIG_VP6_DECODER) {
+        c->vp6_filter_diag4= ff_vp6_filter_diag4_c;
+    }
+    c->h261_loop_filter= h261_loop_filter_c;
+    c->try_8x8basis= try_8x8basis_c;
+    c->add_8x8basis= add_8x8basis_c;
+#if CONFIG_VORBIS_DECODER
+    c->vorbis_inverse_coupling = vorbis_inverse_coupling;
+#endif
+#if CONFIG_AC3_DECODER
+    c->ac3_downmix = ff_ac3_downmix_c;
+#endif
+#if CONFIG_LPC
+    c->lpc_compute_autocorr = ff_lpc_compute_autocorr;
+#endif
+#endif /* 0 */
+    c->vector_fmul = vector_fmul_c;
+    c->vector_fmul_reverse = vector_fmul_reverse_c;
+    c->vector_fmul_add = vector_fmul_add_c;
+    c->vector_fmul_window = ff_vector_fmul_window_c;
+    //c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
+    //c->vector_clipf = vector_clipf_c;
+    //c->float_to_int16 = ff_float_to_int16_c;
+    //c->float_to_int16_interleave = ff_float_to_int16_interleave_c;
+    //c->scalarproduct_int16 = scalarproduct_int16_c;
+    //c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
+    //c->scalarproduct_float = scalarproduct_float_c;
+    //c->butterflies_float = butterflies_float_c;
+    c->vector_fmul_scalar = vector_fmul_scalar_c;
+#if 0
+    c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c;
+    c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c;
+    c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c;
+    c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
+    c->shrink[0]= ff_img_copy_plane;
+    c->shrink[1]= ff_shrink22;
+    c->shrink[2]= ff_shrink44;
+    c->shrink[3]= ff_shrink88;
+    c->prefetch= just_return;
+    memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
+    memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
+    if (HAVE_MMX)        dsputil_init_mmx   (c, avctx);
+    if (ARCH_ARM)        dsputil_init_arm   (c, avctx);
+    if (CONFIG_MLIB)     dsputil_init_mlib  (c, avctx);
+    if (HAVE_VIS)        dsputil_init_vis   (c, avctx);
+    if (ARCH_ALPHA)      dsputil_init_alpha (c, avctx);
+    if (ARCH_PPC)        dsputil_init_ppc   (c, avctx);
+    if (HAVE_MMI)        dsputil_init_mmi   (c, avctx);
+    if (ARCH_SH4)        dsputil_init_sh4   (c, avctx);
+    if (ARCH_BFIN)       dsputil_init_bfin  (c, avctx);
+    for(i=0; i<64; i++){
+        if(!c->put_2tap_qpel_pixels_tab[0][i])
+            c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
+        if(!c->avg_2tap_qpel_pixels_tab[0][i])
+            c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
+    }
+    switch(c->idct_permutation_type){
+    case FF_NO_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= i;
+        break;
+    case FF_LIBMPEG2_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
+        break;
+    case FF_SIMPLE_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= simple_mmx_permutation[i];
+        break;
+    case FF_TRANSPOSE_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
+        break;
+    case FF_PARTTRANS_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
+        break;
+    case FF_SSE2_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
+    }
+#endif /* 0 */   
+}
diff --git a/apps/codecs/libwmapro/dsputil.h b/apps/codecs/libwmapro/dsputil.h
new file mode 100644
index 0000000000..d1816e66ba
--- /dev/null
+++ b/apps/codecs/libwmapro/dsputil.h
@@ -0,0 +1,808 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/dsputil.h
+ * DSP utils.
+ * note, many functions in here may use MMX which trashes the FPU state, it is
+ * absolutely necessary to call emms_c() between dsp & float/double code
+ */
+#ifndef AVCODEC_DSPUTIL_H
+#define AVCODEC_DSPUTIL_H
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
+//#define DEBUG
+/* dct code */
+typedef short DCTELEM;
+void fdct_ifast (DCTELEM *data);
+void fdct_ifast248 (DCTELEM *data);
+void ff_jpeg_fdct_islow (DCTELEM *data);
+void ff_fdct248_islow (DCTELEM *data);
+void j_rev_dct (DCTELEM *data);
+void j_rev_dct4 (DCTELEM *data);
+void j_rev_dct2 (DCTELEM *data);
+void j_rev_dct1 (DCTELEM *data);
+void ff_wmv2_idct_c(DCTELEM *data);
+void ff_fdct_mmx(DCTELEM *block);
+void ff_fdct_mmx2(DCTELEM *block);
+void ff_fdct_sse2(DCTELEM *block);
+void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
+void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
+void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
+                             const float *win, float add_bias, int len);
+void ff_float_to_int16_c(int16_t *dst, const float *src, long len);
+void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels);
+/* encoding scans */
+extern const uint8_t ff_alternate_horizontal_scan[64];
+extern const uint8_t ff_alternate_vertical_scan[64];
+extern const uint8_t ff_zigzag_direct[64];
+extern const uint8_t ff_zigzag248_direct[64];
+/* pixel operations */
+#define MAX_NEG_CROP 1024
+/* temporary */
+extern uint32_t ff_squareTbl[512];
+extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
+/* VP3 DSP functions */
+void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
+void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
+void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
+/* VP6 DSP functions */
+void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
+                           const int16_t *h_weights, const int16_t *v_weights);
+/* Bink functions */
+void ff_bink_idct_c    (DCTELEM *block);
+void ff_bink_idct_add_c(uint8_t *dest, int linesize, DCTELEM *block);
+void ff_bink_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
+/* CAVS functions */
+void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+/* VC1 functions */
+void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+/* EA functions */
+void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
+/* 1/2^n downscaling functions from imgconvert.c */
+void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+              int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+/* minimum alignment rules ;)
+If you notice errors in the align stuff, need more alignment for some ASM code
+for some CPU or need to use a function with less aligned data then send a mail
+to the ffmpeg-devel mailing list, ...
+!warning These alignments might not match reality, (missing attribute((align))
+stuff somewhere possible).
+I (Michael) did not check them, these are just the alignments which I think
+could be reached easily ...
+!future video codecs might need functions with less strict alignment
+*/
+/*
+void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
+void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
+void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
+void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
+void clear_blocks_c(DCTELEM *blocks);
+*/
+/* add and put pixel (decoding) */
+// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
+//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
+typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
+typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
+typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
+typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
+typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h);
+#define DEF_OLD_QPEL(name)\
+void ff_put_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
+void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
+void ff_avg_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
+DEF_OLD_QPEL(qpel16_mc11_old_c)
+DEF_OLD_QPEL(qpel16_mc31_old_c)
+DEF_OLD_QPEL(qpel16_mc12_old_c)
+DEF_OLD_QPEL(qpel16_mc32_old_c)
+DEF_OLD_QPEL(qpel16_mc13_old_c)
+DEF_OLD_QPEL(qpel16_mc33_old_c)
+DEF_OLD_QPEL(qpel8_mc11_old_c)
+DEF_OLD_QPEL(qpel8_mc31_old_c)
+DEF_OLD_QPEL(qpel8_mc12_old_c)
+DEF_OLD_QPEL(qpel8_mc32_old_c)
+DEF_OLD_QPEL(qpel8_mc13_old_c)
+DEF_OLD_QPEL(qpel8_mc33_old_c)
+#define CALL_2X_PIXELS(a, b, n)\
+static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    b(block  , pixels  , line_size, h);\
+    b(block+n, pixels+n, line_size, h);\
+}
+/* motion estimation */
+// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
+// although currently h<4 is not used as functions with width <8 are neither used nor implemented
+typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
+/**
+ * Scantable.
+ */
+typedef struct ScanTable{
+    const uint8_t *scantable;
+    uint8_t permutated[64];
+    uint8_t raster_end[64];
+#if ARCH_PPC
+                /** Used by dct_quantize_altivec to find last-non-zero */
+    DECLARE_ALIGNED(16, uint8_t, inverse)[64];
+#endif
+} ScanTable;
+void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize,
+                         int block_w, int block_h,
+                         int src_x, int src_y, int w, int h);
+/**
+ * DSPContext.
+ */
+typedef struct DSPContext {
+    /* pixel ops : interface with DCT */
+    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
+    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
+    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*put_pixels_nonclamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
+    void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
+    int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/);
+    /**
+     * translational global motion compensation.
+     */
+    void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
+    /**
+     * global motion compensation.
+     */
+    void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
+                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+    void (*clear_block)(DCTELEM *block/*align 16*/);
+    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
+    int (*pix_sum)(uint8_t * pix, int line_size);
+    int (*pix_norm1)(uint8_t * pix, int line_size);
+// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
+    me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
+    me_cmp_func sse[6];
+    me_cmp_func hadamard8_diff[6];
+    me_cmp_func dct_sad[6];
+    me_cmp_func quant_psnr[6];
+    me_cmp_func bit[6];
+    me_cmp_func rd[6];
+    me_cmp_func vsad[6];
+    me_cmp_func vsse[6];
+    me_cmp_func nsse[6];
+    me_cmp_func w53[6];
+    me_cmp_func w97[6];
+    me_cmp_func dct_max[6];
+    me_cmp_func dct264_sad[6];
+    me_cmp_func me_pre_cmp[6];
+    me_cmp_func me_cmp[6];
+    me_cmp_func me_sub_cmp[6];
+    me_cmp_func mb_cmp[6];
+    me_cmp_func ildct_cmp[6]; //only width 16 used
+    me_cmp_func frame_skip_cmp[6]; //only width 8 used
+    int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
+                             int size);
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[4][4] of motion compensation functions for 4
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_pixels_tab[4][4];
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * This is an array[4][4] of motion compensation functions for 4
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b+1)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_pixels_tab[4][4];
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[2][4] of motion compensation functions for 2
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_no_rnd_pixels_tab[4][4];
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[2][4] of motion compensation functions for 2
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_no_rnd_pixels_tab[4][4];
+    void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
+    /**
+     * Thirdpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[12] of motion compensation functions for the 9 thirdpe
+     * positions<br>
+     * *pixels_tab[ xthirdpel + 4*ythirdpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+    tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+    qpel_mc_func put_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_qpel_pixels_tab[2][16];
+    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
+    qpel_mc_func put_mspel_pixels_tab[8];
+    /**
+     * h264 Chroma MC
+     */
+    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
+    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
+    /* This is really one func used in VC-1 decoding */
+    h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3];
+    h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3];
+    qpel_mc_func put_h264_qpel_pixels_tab[4][16];
+    qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
+    qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
+    qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];
+    /* AVS specific */
+    qpel_mc_func put_cavs_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_cavs_qpel_pixels_tab[2][16];
+    void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
+    me_cmp_func pix_abs[2][4];
+    /* huffyuv specific */
+    void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
+    void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w);
+    void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
+    /**
+     * subtract huffyuv's variant of median prediction
+     * note, this might read from src1[-1], src2[-1]
+     */
+    void (*sub_hfyu_median_prediction)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top);
+    void (*add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top);
+    int  (*add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left);
+    void (*add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha);
+    /* this might write to dst[w] */
+    void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
+    void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
+    void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*h261_loop_filter)(uint8_t *src, int stride);
+    void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
+    void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
+    void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
+                             const int16_t *h_weights,const int16_t *v_weights);
+    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
+    void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
+    void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
+    /* no alignment needed */
+    void (*lpc_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc);
+    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
+    void (*vector_fmul)(float *dst, const float *src, int len);
+    void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
+    /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
+    void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
+    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
+    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
+    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
+    void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
+    void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
+    /**
+     * Multiply a vector of floats by a scalar float.  Source and
+     * destination vectors must overlap exactly or not at all.
+     * @param dst result vector, 16-byte aligned
+     * @param src input vector, 16-byte aligned
+     * @param mul scalar value
+     * @param len length of vector, multiple of 4
+     */
+    void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
+                               int len);
+    /**
+     * Multiply a vector of floats by concatenated short vectors of
+     * floats and by a scalar float.  Source and destination vectors
+     * must overlap exactly or not at all.
+     * [0]: short vectors of length 2, 8-byte aligned
+     * [1]: short vectors of length 4, 16-byte aligned
+     * @param dst output vector, 16-byte aligned
+     * @param src input vector, 16-byte aligned
+     * @param sv  array of pointers to short vectors
+     * @param mul scalar value
+     * @param len number of elements in src and dst, multiple of 4
+     */
+    void (*vector_fmul_sv_scalar[2])(float *dst, const float *src,
+                                     const float **sv, float mul, int len);
+    /**
+     * Multiply short vectors of floats by a scalar float, store
+     * concatenated result.
+     * [0]: short vectors of length 2, 8-byte aligned
+     * [1]: short vectors of length 4, 16-byte aligned
+     * @param dst output vector, 16-byte aligned
+     * @param sv  array of pointers to short vectors
+     * @param mul scalar value
+     * @param len number of output elements, multiple of 4
+     */
+    void (*sv_fmul_scalar[2])(float *dst, const float **sv,
+                              float mul, int len);
+    /**
+     * Calculate the scalar product of two vectors of floats.
+     * @param v1  first vector, 16-byte aligned
+     * @param v2  second vector, 16-byte aligned
+     * @param len length of vectors, multiple of 4
+     */
+    float (*scalarproduct_float)(const float *v1, const float *v2, int len);
+    /**
+     * Calculate the sum and difference of two vectors of floats.
+     * @param v1  first input vector, sum output, 16-byte aligned
+     * @param v2  second input vector, difference output, 16-byte aligned
+     * @param len length of vectors, multiple of 4
+     */
+    void (*butterflies_float)(float *restrict v1, float *restrict v2, int len);
+    /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
+     * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
+    void (*float_to_int16)(int16_t *dst, const float *src, long len);
+    void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels);
+    /* (I)DCT */
+    void (*fdct)(DCTELEM *block/* align 16*/);
+    void (*fdct248)(DCTELEM *block/* align 16*/);
+    /* IDCT really*/
+    void (*idct)(DCTELEM *block/* align 16*/);
+    /**
+     * block -> idct -> clip to unsigned 8 bit -> dest.
+     * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
+     * @param line_size size in bytes of a horizontal line of dest
+     */
+    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+    /**
+     * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
+     * @param line_size size in bytes of a horizontal line of dest
+     */
+    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+    /**
+     * idct input permutation.
+     * several optimized IDCTs need a permutated input (relative to the normal order of the reference
+     * IDCT)
+     * this permutation must be performed before the idct_put/add, note, normally this can be merged
+     * with the zigzag/alternate scan<br>
+     * an example to avoid confusion:
+     * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
+     * - (x -> referece dct -> reference idct -> x)
+     * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
+     * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
+     */
+    uint8_t idct_permutation[64];
+    int idct_permutation_type;
+#define FF_NO_IDCT_PERM 1
+#define FF_LIBMPEG2_IDCT_PERM 2
+#define FF_SIMPLE_IDCT_PERM 3
+#define FF_TRANSPOSE_IDCT_PERM 4
+#define FF_PARTTRANS_IDCT_PERM 5
+#define FF_SSE2_IDCT_PERM 6
+    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
+    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
+#define BASIS_SHIFT 16
+#define RECON_SHIFT 6
+    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w);
+#define EDGE_WIDTH 16
+    void (*prefetch)(void *mem, int stride, int h);
+    void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+    /* mlp/truehd functions */
+    void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
+                               int firorder, int iirorder,
+                               unsigned int filter_shift, int32_t mask, int blocksize,
+                               int32_t *sample_buffer);
+    /* vc1 functions */
+    void (*vc1_inv_trans_8x8)(DCTELEM *b);
+    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_v_overlap)(uint8_t* src, int stride);
+    void (*vc1_h_overlap)(uint8_t* src, int stride);
+    void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
+    void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
+    void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
+    void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq);
+    void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq);
+    void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq);
+    /* put 8x8 block with bicubic interpolation and quarterpel precision
+     * last argument is actually round value instead of height
+     */
+    op_pixels_func put_vc1_mspel_pixels_tab[16];
+    op_pixels_func avg_vc1_mspel_pixels_tab[16];
+    /* intrax8 functions */
+    void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
+    void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
+           int * range, int * sum,  int edges);
+    /**
+     * Calculate scalar product of two vectors.
+     * @param len length of vectors, should be multiple of 16
+     * @param shift number of bits to discard from product
+     */
+    int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift);
+    /* ape functions */
+    /**
+     * Calculate scalar product of v1 and v2,
+     * and v1[i] += v3[i] * mul
+     * @param len length of vectors, should be multiple of 16
+     */
+    int32_t (*scalarproduct_and_madd_int16)(int16_t *v1/*align 16*/, int16_t *v2, int16_t *v3, int len, int mul);
+    /* rv30 functions */
+    qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
+    qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
+    /* rv40 functions */
+    qpel_mc_func put_rv40_qpel_pixels_tab[4][16];
+    qpel_mc_func avg_rv40_qpel_pixels_tab[4][16];
+    h264_chroma_mc_func put_rv40_chroma_pixels_tab[3];
+    h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3];
+    /* bink functions */
+    op_fill_func fill_block_tab[2];
+    void (*scale_block)(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize);
+} DSPContext;
+void dsputil_static_init(void);
+void dsputil_init(DSPContext* p, AVCodecContext *avctx);
+int ff_check_alignment(void);
+/**
+ * permute block according to permuatation.
+ * @param last last non zero element in scantable order
+ */
+void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
+void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
+#define         BYTE_VEC32(c)   ((c)*0x01010101UL)
+static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+static inline int get_penalty_factor(int lambda, int lambda2, int type){
+    switch(type&0xFF){
+    default:
+    case FF_CMP_SAD:
+        return lambda>>FF_LAMBDA_SHIFT;
+    case FF_CMP_DCT:
+        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
+    case FF_CMP_W53:
+        return (4*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_W97:
+        return (2*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_SATD:
+    case FF_CMP_DCT264:
+        return (2*lambda)>>FF_LAMBDA_SHIFT;
+    case FF_CMP_RD:
+    case FF_CMP_PSNR:
+    case FF_CMP_SSE:
+    case FF_CMP_NSSE:
+        return lambda2>>FF_LAMBDA_SHIFT;
+    case FF_CMP_BIT:
+        return 1;
+    }
+}
+/**
+ * Empty mmx state.
+ * this must be called between any dsp function and float/double code.
+ * for example sin(); dsp->idct_put(); emms_c(); cos()
+ */
+#define emms_c()
+/* should be defined by architectures supporting
+   one or more MultiMedia extension */
+int mm_support(void);
+extern int mm_flags;
+void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_dwt(DSPContext *c);
+void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
+#if HAVE_MMX
+#undef emms_c
+static inline void emms(void)
+{
+    __asm__ volatile ("emms;":::"memory");
+}
+#define emms_c() \
+{\
+    if (mm_flags & FF_MM_MMX)\
+        emms();\
+}
+#elif ARCH_ARM
+#if HAVE_NEON
+#   define STRIDE_ALIGN 16
+#endif
+#elif ARCH_PPC
+#define STRIDE_ALIGN 16
+#elif HAVE_MMI
+#define STRIDE_ALIGN 16
+#else
+#define mm_flags 0
+#define mm_support() 0
+#endif
+#ifndef STRIDE_ALIGN
+#   define STRIDE_ALIGN 8
+#endif
+#define LOCAL_ALIGNED(a, t, v, s, ...)                          \
+    uint8_t la_##v[sizeof(t s __VA_ARGS__) + (a)];              \
+    t (*v) __VA_ARGS__ = (void *)FFALIGN((uintptr_t)la_##v, a)
+#if HAVE_LOCAL_ALIGNED_8
+#   define LOCAL_ALIGNED_8(t, v, s, ...) DECLARE_ALIGNED(8, t, v) s __VA_ARGS__
+#else
+#   define LOCAL_ALIGNED_8(t, v, s, ...) LOCAL_ALIGNED(8, t, v, s, __VA_ARGS__)
+#endif
+#if HAVE_LOCAL_ALIGNED_16
+#   define LOCAL_ALIGNED_16(t, v, s, ...) DECLARE_ALIGNED(16, t, v) s __VA_ARGS__
+#else
+#   define LOCAL_ALIGNED_16(t, v, s, ...) LOCAL_ALIGNED(16, t, v, s, __VA_ARGS__)
+#endif
+/* PSNR */
+void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
+              int orig_linesize[3], int coded_linesize,
+              AVCodecContext *avctx);
+#define WRAPPER8_16(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+    return name8(s, dst           , src           , stride, h)\
+          +name8(s, dst+8         , src+8         , stride, h);\
+}
+#define WRAPPER8_16_SQ(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+    int score=0;\
+    score +=name8(s, dst           , src           , stride, 8);\
+    score +=name8(s, dst+8         , src+8         , stride, 8);\
+    if(h==16){\
+        dst += 8*stride;\
+        src += 8*stride;\
+        score +=name8(s, dst           , src           , stride, 8);\
+        score +=name8(s, dst+8         , src+8         , stride, 8);\
+    }\
+    return score;\
+}
+static inline void copy_block2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN16(dst   , AV_RN16(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+static inline void copy_block4(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+static inline void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+static inline void copy_block9(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        dst[8]= src[8];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+static inline void copy_block16(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        AV_WN32(dst+8 , AV_RN32(src+8 ));
+        AV_WN32(dst+12, AV_RN32(src+12));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+static inline void copy_block17(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        AV_WN32(dst+8 , AV_RN32(src+8 ));
+        AV_WN32(dst+12, AV_RN32(src+12));
+        dst[16]= src[16];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+#endif /* AVCODEC_DSPUTIL_H */
diff --git a/apps/codecs/libwmapro/fft.c b/apps/codecs/libwmapro/fft.c
new file mode 100644
index 0000000000..80dd35b2b6
--- /dev/null
+++ b/apps/codecs/libwmapro/fft.c
@@ -0,0 +1,385 @@
+/*
+ * FFT/IFFT transforms
+ * Copyright (c) 2008 Loren Merritt
+ * Copyright (c) 2002 Fabrice Bellard
+ * Partly based on libdjbfft by D. J. Bernstein
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/fft.c
+ * FFT/IFFT transforms.
+ */
+#include "dsputil.h"
+#include "fft.h"
+#ifndef M_PI
+#define M_PI           3.14159265358979323846  /* pi */
+#endif
+#ifndef M_SQRT1_2
+#define M_SQRT1_2      0.70710678118654752440  /* 1/sqrt(2) */
+#endif
+#ifndef M_SQRT2
+#define M_SQRT2        1.41421356237309504880  /* sqrt(2) */
+#endif
+/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
+DECLARE_ALIGNED_16(FFTSample, ff_cos_16[8]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_32[16]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_64[32]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_128[64]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_256[128]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_512[256]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_1024[512]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_2048[1024]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_4096[2048]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_8192[4096]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_16384[8192]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_32768[16384]);
+DECLARE_ALIGNED_16(FFTSample, ff_cos_65536[32768]);
+FFTSample *ff_cos_tabs[] = {
+    ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024,
+    ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536,
+};
+static int split_radix_permutation(int i, int n, int inverse)
+{
+    int m;
+    if(n <= 2) return i&1;
+    m = n >> 1;
+    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
+    m >>= 1;
+    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
+    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
+}
+av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
+{
+    int i, j, m, n;
+    float alpha, c1, s1, s2;
+    int split_radix = 1;
+    int av_unused has_vectors;
+    if (nbits < 2 || nbits > 16)
+        goto fail;
+    s->nbits = nbits;
+    n = 1 << nbits;
+    s->tmp_buf = NULL;
+    s->exptab  = av_malloc((n / 2) * sizeof(FFTComplex));
+    if (!s->exptab)
+        goto fail;
+    s->revtab = av_malloc(n * sizeof(uint16_t));
+    if (!s->revtab)
+        goto fail;
+    s->inverse = inverse;
+    s2 = inverse ? 1.0 : -1.0;
+    s->fft_permute = ff_fft_permute_c;
+    s->fft_calc    = ff_fft_calc_c;
+    s->imdct_calc  = ff_imdct_calc_c;
+    s->imdct_half  = ff_imdct_half_c;
+    s->exptab1     = NULL;
+#if HAVE_MMX && HAVE_YASM
+    has_vectors = mm_support();
+    if (has_vectors & FF_MM_SSE && HAVE_SSE) {
+        /* SSE for P3/P4/K8 */
+        s->imdct_calc  = ff_imdct_calc_sse;
+        s->imdct_half  = ff_imdct_half_sse;
+        s->fft_permute = ff_fft_permute_sse;
+        s->fft_calc    = ff_fft_calc_sse;
+    } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) {
+        /* 3DNowEx for K7 */
+        s->imdct_calc = ff_imdct_calc_3dn2;
+        s->imdct_half = ff_imdct_half_3dn2;
+        s->fft_calc   = ff_fft_calc_3dn2;
+    } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) {
+        /* 3DNow! for K6-2/3 */
+        s->imdct_calc = ff_imdct_calc_3dn;
+        s->imdct_half = ff_imdct_half_3dn;
+        s->fft_calc   = ff_fft_calc_3dn;
+    }
+#elif HAVE_ALTIVEC
+    has_vectors = mm_support();
+    if (has_vectors & FF_MM_ALTIVEC) {
+        s->fft_calc = ff_fft_calc_altivec;
+        split_radix = 0;
+    }
+#endif
+    if (split_radix) {
+        for(j=4; j<=nbits; j++) {
+            int m = 1<<j;
+            double freq = 2*M_PI/m;
+            FFTSample *tab = ff_cos_tabs[j-4];
+            for(i=0; i<=m/4; i++)
+                tab[i] = cos(i*freq);
+            for(i=1; i<m/4; i++)
+                tab[m/2-i] = tab[i];
+        }
+        for(i=0; i<n; i++)
+            s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i;
+        s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
+    } else {
+        int np, nblocks, np2, l;
+        FFTComplex *q;
+        for(i=0; i<(n/2); i++) {
+            alpha = 2 * M_PI * (float)i / (float)n;
+            c1 = cos(alpha);
+            s1 = sin(alpha) * s2;
+            s->exptab[i].re = c1;
+            s->exptab[i].im = s1;
+        }
+        np = 1 << nbits;
+        nblocks = np >> 3;
+        np2 = np >> 1;
+        s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
+        if (!s->exptab1)
+            goto fail;
+        q = s->exptab1;
+        do {
+            for(l = 0; l < np2; l += 2 * nblocks) {
+                *q++ = s->exptab[l];
+                *q++ = s->exptab[l + nblocks];
+                q->re = -s->exptab[l].im;
+                q->im = s->exptab[l].re;
+                q++;
+                q->re = -s->exptab[l + nblocks].im;
+                q->im = s->exptab[l + nblocks].re;
+                q++;
+            }
+            nblocks = nblocks >> 1;
+        } while (nblocks != 0);
+        av_freep(&s->exptab);
+        /* compute bit reverse table */
+        for(i=0;i<n;i++) {
+            m=0;
+            for(j=0;j<nbits;j++) {
+                m |= ((i >> j) & 1) << (nbits-j-1);
+            }
+            s->revtab[i]=m;
+        }
+    }
+    return 0;
+ fail:
+    av_freep(&s->revtab);
+    av_freep(&s->exptab);
+    av_freep(&s->exptab1);
+    av_freep(&s->tmp_buf);
+    return -1;
+}
+void ff_fft_permute_c(FFTContext *s, FFTComplex *z)
+{
+    int j, k, np;
+    FFTComplex tmp;
+    const uint16_t *revtab = s->revtab;
+    np = 1 << s->nbits;
+    if (s->tmp_buf) {
+        /* TODO: handle split-radix permute in a more optimal way, probably in-place */
+        for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
+        memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
+        return;
+    }
+    /* reverse */
+    for(j=0;j<np;j++) {
+        k = revtab[j];
+        if (k < j) {
+            tmp = z[k];
+            z[k] = z[j];
+            z[j] = tmp;
+        }
+    }
+}
+av_cold void ff_fft_end(FFTContext *s)
+{
+    av_freep(&s->revtab);
+    av_freep(&s->exptab);
+    av_freep(&s->exptab1);
+    av_freep(&s->tmp_buf);
+}
+#define sqrthalf (float)M_SQRT1_2
+#define BF(x,y,a,b) {\
+    x = a - b;\
+    y = a + b;\
+}
+#define BUTTERFLIES(a0,a1,a2,a3) {\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, a0.re, t5);\
+    BF(a3.im, a1.im, a1.im, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, a1.re, t4);\
+    BF(a2.im, a0.im, a0.im, t6);\
+}
+// force loading all the inputs before storing any.
+// this is slightly slower for small data, but avoids store->load aliasing
+// for addresses separated by large powers of 2.
+#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
+    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, r0, t5);\
+    BF(a3.im, a1.im, i1, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, r1, t4);\
+    BF(a2.im, a0.im, i0, t6);\
+}
+#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
+    t1 = a2.re * wre + a2.im * wim;\
+    t2 = a2.im * wre - a2.re * wim;\
+    t5 = a3.re * wre - a3.im * wim;\
+    t6 = a3.im * wre + a3.re * wim;\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
+    t1 = a2.re;\
+    t2 = a2.im;\
+    t5 = a3.re;\
+    t6 = a3.im;\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+/* z[0...8n-1], w[1...2n-1] */
+#define PASS(name)\
+static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
+{\
+    FFTSample t1, t2, t3, t4, t5, t6;\
+    int o1 = 2*n;\
+    int o2 = 4*n;\
+    int o3 = 6*n;\
+    const FFTSample *wim = wre+o1;\
+    n--;\
+\
+    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
+    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    do {\
+        z += 2;\
+        wre += 2;\
+        wim -= 2;\
+        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
+        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    } while(--n);\
+}
+PASS(pass)
+#undef BUTTERFLIES
+#define BUTTERFLIES BUTTERFLIES_BIG
+PASS(pass_big)
+#define DECL_FFT(n,n2,n4)\
+static void fft##n(FFTComplex *z)\
+{\
+    fft##n2(z);\
+    fft##n4(z+n4*2);\
+    fft##n4(z+n4*3);\
+    pass(z,ff_cos_##n,n4/2);\
+}
+static void fft4(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+    BF(t3, t1, z[0].re, z[1].re);
+    BF(t8, t6, z[3].re, z[2].re);
+    BF(z[2].re, z[0].re, t1, t6);
+    BF(t4, t2, z[0].im, z[1].im);
+    BF(t7, t5, z[2].im, z[3].im);
+    BF(z[3].im, z[1].im, t4, t8);
+    BF(z[3].re, z[1].re, t3, t7);
+    BF(z[2].im, z[0].im, t2, t5);
+}
+static void fft8(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+    fft4(z);
+    BF(t1, z[5].re, z[4].re, -z[5].re);
+    BF(t2, z[5].im, z[4].im, -z[5].im);
+    BF(t3, z[7].re, z[6].re, -z[7].re);
+    BF(t4, z[7].im, z[6].im, -z[7].im);
+    BF(t8, t1, t3, t1);
+    BF(t7, t2, t2, t4);
+    BF(z[4].re, z[0].re, z[0].re, t1);
+    BF(z[4].im, z[0].im, z[0].im, t2);
+    BF(z[6].re, z[2].re, z[2].re, t7);
+    BF(z[6].im, z[2].im, z[2].im, t8);
+    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
+}
+#if !CONFIG_SMALL
+static void fft16(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6;
+    fft8(z);
+    fft4(z+8);
+    fft4(z+12);
+    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
+    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
+    TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]);
+    TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]);
+}
+#else
+DECL_FFT(16,8,4)
+#endif
+DECL_FFT(32,16,8)
+DECL_FFT(64,32,16)
+DECL_FFT(128,64,32)
+DECL_FFT(256,128,64)
+DECL_FFT(512,256,128)
+#if !CONFIG_SMALL
+#define pass pass_big
+#endif
+DECL_FFT(1024,512,256)
+DECL_FFT(2048,1024,512)
+DECL_FFT(4096,2048,1024)
+DECL_FFT(8192,4096,2048)
+DECL_FFT(16384,8192,4096)
+DECL_FFT(32768,16384,8192)
+DECL_FFT(65536,32768,16384)
+static void (*fft_dispatch[])(FFTComplex*) = {
+    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
+    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
+};
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
+{
+    fft_dispatch[s->nbits-2](z);
+}
diff --git a/apps/codecs/libwmapro/fft.h b/apps/codecs/libwmapro/fft.h
new file mode 100644
index 0000000000..541a46bbba
--- /dev/null
+++ b/apps/codecs/libwmapro/fft.h
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_FFT_H
+#define AVCODEC_FFT_H
+#include <stdint.h>
+//#include "config.h"
+#include "libavutil/mem.h"
+#include "avfft.h"
+/* DECLARE_ALIGNED - Taken from libavutil/internal.h */
+#define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
+#define DECLARE_ALIGNED_16(t,v)     DECLARE_ALIGNED(16,t,v)
+/* FFT computation */
+struct FFTContext {
+    int nbits;
+    int inverse;
+    uint16_t *revtab;
+    FFTComplex *exptab;
+    FFTComplex *exptab1; /* only used by SSE code */
+    FFTComplex *tmp_buf;
+    int mdct_size; /* size of MDCT (i.e. number of input data * 2) */
+    int mdct_bits; /* n = 2^nbits */
+    /* pre/post rotation tables */
+    FFTSample *tcos;
+    FFTSample *tsin;
+    void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
+    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
+    void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    int split_radix;
+    int permutation;
+#define FF_MDCT_PERM_NONE       0
+#define FF_MDCT_PERM_INTERLEAVE 1
+};
+#if CONFIG_HARDCODED_TABLES
+#define COSTABLE_CONST const
+#define SINTABLE_CONST const
+#define SINETABLE_CONST const
+#else
+#define COSTABLE_CONST
+#define SINTABLE_CONST
+#define SINETABLE_CONST
+#endif
+#define COSTABLE(size) \
+    COSTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_cos_##size)[size/2]
+#define SINTABLE(size) \
+    SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
+#define SINETABLE(size) \
+    SINETABLE_CONST DECLARE_ALIGNED(16, float, ff_sine_##size)[size]
+extern COSTABLE(16);
+extern COSTABLE(32);
+extern COSTABLE(64);
+extern COSTABLE(128);
+extern COSTABLE(256);
+extern COSTABLE(512);
+extern COSTABLE(1024);
+extern COSTABLE(2048);
+extern COSTABLE(4096);
+extern COSTABLE(8192);
+extern COSTABLE(16384);
+extern COSTABLE(32768);
+extern COSTABLE(65536);
+//extern COSTABLE_CONST FFTSample* const ff_cos_tabs[17];
+/**
+ * Initializes the cosine table in ff_cos_tabs[index]
+ * \param index index in ff_cos_tabs array of the table to initialize
+ */
+void ff_init_ff_cos_tabs(int index);
+extern SINTABLE(16);
+extern SINTABLE(32);
+extern SINTABLE(64);
+extern SINTABLE(128);
+extern SINTABLE(256);
+extern SINTABLE(512);
+extern SINTABLE(1024);
+extern SINTABLE(2048);
+extern SINTABLE(4096);
+extern SINTABLE(8192);
+extern SINTABLE(16384);
+extern SINTABLE(32768);
+extern SINTABLE(65536);
+/**
+ * Sets up a complex FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
+ */
+int ff_fft_init(FFTContext *s, int nbits, int inverse);
+void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
+void ff_fft_init_altivec(FFTContext *s);
+void ff_fft_init_mmx(FFTContext *s);
+void ff_fft_init_arm(FFTContext *s);
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
+{
+    s->fft_permute(s, z);
+}
+/**
+ * Do a complex FFT with the parameters defined in ff_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
+{
+    s->fft_calc(s, z);
+}
+void ff_fft_end(FFTContext *s);
+/* MDCT computation */
+static inline void ff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_calc(s, output, input);
+}
+static inline void ff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_half(s, output, input);
+}
+static inline void ff_mdct_calc(FFTContext *s, FFTSample *output,
+                                const FFTSample *input)
+{
+    s->mdct_calc(s, output, input);
+}
+/**
+ * Generate a Kaiser-Bessel Derived Window.
+ * @param   window  pointer to half window
+ * @param   alpha   determines window shape
+ * @param   n       size of half window
+ */
+void ff_kbd_window_init(float *window, float alpha, int n);
+/**
+ * Generate a sine window.
+ * @param   window  pointer to half window
+ * @param   n       size of half window
+ */
+void ff_sine_window_init(float *window, int n);
+/**
+ * initialize the specified entry of ff_sine_windows
+ */
+void ff_init_ff_sine_windows(int index);
+extern SINETABLE(  32);
+extern SINETABLE(  64);
+extern SINETABLE( 128);
+extern SINETABLE( 256);
+extern SINETABLE( 512);
+extern SINETABLE(1024);
+extern SINETABLE(2048);
+extern SINETABLE(4096);
+extern SINETABLE_CONST float * const ff_sine_windows[13];
+int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
+void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_end(FFTContext *s);
+/* Real Discrete Fourier Transform */
+struct RDFTContext {
+    int nbits;
+    int inverse;
+    int sign_convention;
+    /* pre/post rotation tables */
+    const FFTSample *tcos;
+    SINTABLE_CONST FFTSample *tsin;
+    FFTContext fft;
+    void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
+};
+/**
+ * Sets up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
+void ff_rdft_end(RDFTContext *s);
+void ff_rdft_init_arm(RDFTContext *s);
+static av_always_inline void ff_rdft_calc(RDFTContext *s, FFTSample *data)
+{
+    s->rdft_calc(s, data);
+}
+/* Discrete Cosine Transform */
+struct DCTContext {
+    int nbits;
+    int inverse;
+    RDFTContext rdft;
+    const float *costab;
+    FFTSample *csc2;
+    void (*dct_calc)(struct DCTContext *s, FFTSample *data);
+};
+/**
+ * Sets up DCT.
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+int  ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType type);
+void ff_dct_calc(DCTContext *s, FFTSample *data);
+void ff_dct_end (DCTContext *s);
+#endif /* AVCODEC_FFT_H */
diff --git a/apps/codecs/libwmapro/get_bits.h b/apps/codecs/libwmapro/get_bits.h
new file mode 100644
index 0000000000..ca84ea61c0
--- /dev/null
+++ b/apps/codecs/libwmapro/get_bits.h
@@ -0,0 +1,692 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/get_bits.h
+ * bitstream reader API header.
+ */
+#ifndef AVCODEC_GET_BITS_H
+#define AVCODEC_GET_BITS_H
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "libavutil/bswap.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+#include "mathops.h"
+#if defined(ALT_BITSTREAM_READER_LE) && !defined(ALT_BITSTREAM_READER)
+#   define ALT_BITSTREAM_READER
+#endif
+#if !defined(LIBMPEG2_BITSTREAM_READER) && !defined(A32_BITSTREAM_READER) && !defined(ALT_BITSTREAM_READER)
+#   if ARCH_ARM && !HAVE_FAST_UNALIGNED
+#       define A32_BITSTREAM_READER
+#   else
+#       define ALT_BITSTREAM_READER
+//#define LIBMPEG2_BITSTREAM_READER
+//#define A32_BITSTREAM_READER
+#   endif
+#endif
+/* bit input */
+/* buffer, buffer_end and size_in_bits must be present and used by every reader */
+typedef struct GetBitContext {
+    const uint8_t *buffer, *buffer_end;
+#ifdef ALT_BITSTREAM_READER
+    int index;
+#elif defined LIBMPEG2_BITSTREAM_READER
+    uint8_t *buffer_ptr;
+    uint32_t cache;
+    int bit_count;
+#elif defined A32_BITSTREAM_READER
+    uint32_t *buffer_ptr;
+    uint32_t cache0;
+    uint32_t cache1;
+    int bit_count;
+#endif
+    int size_in_bits;
+} GetBitContext;
+#define VLC_TYPE int16_t
+typedef struct VLC {
+    int bits;
+    VLC_TYPE (*table)[2]; ///< code, bits
+    int table_size, table_allocated;
+} VLC;
+typedef struct RL_VLC_ELEM {
+    int16_t level;
+    int8_t len;
+    uint8_t run;
+} RL_VLC_ELEM;
+/* Bitstream reader API docs:
+name
+    arbitrary name which is used as prefix for the internal variables
+gb
+    getbitcontext
+OPEN_READER(name, gb)
+    loads gb into local variables
+CLOSE_READER(name, gb)
+    stores local vars in gb
+UPDATE_CACHE(name, gb)
+    refills the internal cache from the bitstream
+    after this call at least MIN_CACHE_BITS will be available,
+GET_CACHE(name, gb)
+    will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit)
+SHOW_UBITS(name, gb, num)
+    will return the next num bits
+SHOW_SBITS(name, gb, num)
+    will return the next num bits and do sign extension
+SKIP_BITS(name, gb, num)
+    will skip over the next num bits
+    note, this is equivalent to SKIP_CACHE; SKIP_COUNTER
+SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER)
+SKIP_COUNTER(name, gb, num)
+    will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS)
+LAST_SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing
+LAST_SKIP_BITS(name, gb, num)
+    is equivalent to LAST_SKIP_CACHE; SKIP_COUNTER
+for examples see get_bits, show_bits, skip_bits, get_vlc
+*/
+#ifdef ALT_BITSTREAM_READER
+#   define MIN_CACHE_BITS 25
+#   define OPEN_READER(name, gb)\
+        unsigned int name##_index= (gb)->index;\
+        int name##_cache= 0;\
+#   define CLOSE_READER(name, gb)\
+        (gb)->index= name##_index;\
+# ifdef ALT_BITSTREAM_READER_LE
+#   define UPDATE_CACHE(name, gb)\
+        name##_cache= AV_RL32( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) >> (name##_index&0x07);\
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache >>= (num);
+# else
+#   define UPDATE_CACHE(name, gb)\
+        name##_cache= AV_RB32( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);
+# endif
+// FIXME name?
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_index += (num);\
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) ;
+# ifdef ALT_BITSTREAM_READER_LE
+#   define SHOW_UBITS(name, gb, num)\
+        zero_extend(name##_cache, num)
+#   define SHOW_SBITS(name, gb, num)\
+        sign_extend(name##_cache, num)
+# else
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache, num)
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache, num)
+# endif
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+static inline int get_bits_count(const GetBitContext *s){
+    return s->index;
+}
+static inline void skip_bits_long(GetBitContext *s, int n){
+    s->index += n;
+}
+#elif defined LIBMPEG2_BITSTREAM_READER
+//libmpeg2 like reader
+#   define MIN_CACHE_BITS 17
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        int name##_cache= (gb)->cache;\
+        uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache= name##_cache;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count >= 0){\
+        name##_cache+= AV_RB16(name##_buffer_ptr) << name##_bit_count; \
+        name##_buffer_ptr+=2;\
+        name##_bit_count-= 16;\
+    }\
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);\
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache, num)
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache, num)
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+static inline int get_bits_count(const GetBitContext *s){
+    return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count;
+}
+static inline void skip_bits_long(GetBitContext *s, int n){
+    OPEN_READER(re, s)
+    re_bit_count += n;
+    re_buffer_ptr += 2*(re_bit_count>>4);
+    re_bit_count &= 15;
+    re_cache = ((re_buffer_ptr[-2]<<8) + re_buffer_ptr[-1]) << (16+re_bit_count);
+    UPDATE_CACHE(re, s)
+    CLOSE_READER(re, s)
+}
+#elif defined A32_BITSTREAM_READER
+#   define MIN_CACHE_BITS 32
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        uint32_t name##_cache0= (gb)->cache0;\
+        uint32_t name##_cache1= (gb)->cache1;\
+        uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache0= name##_cache0;\
+        (gb)->cache1= name##_cache1;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count > 0){\
+        const uint32_t next= be2me_32( *name##_buffer_ptr );\
+        name##_cache0 |= NEG_USR32(next,name##_bit_count);\
+        name##_cache1 |= next<<name##_bit_count;\
+        name##_buffer_ptr++;\
+        name##_bit_count-= 32;\
+    }\
+#if ARCH_X86
+#   define SKIP_CACHE(name, gb, num)\
+        __asm__(\
+            "shldl %2, %1, %0          \n\t"\
+            "shll %2, %1               \n\t"\
+            : "+r" (name##_cache0), "+r" (name##_cache1)\
+            : "Ic" ((uint8_t)(num))\
+           );
+#else
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache0 <<= (num);\
+        name##_cache0 |= NEG_USR32(name##_cache1,num);\
+        name##_cache1 <<= (num);
+#endif
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache0, num)
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache0, num)
+#   define GET_CACHE(name, gb)\
+        (name##_cache0)
+static inline int get_bits_count(const GetBitContext *s){
+    return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
+}
+static inline void skip_bits_long(GetBitContext *s, int n){
+    OPEN_READER(re, s)
+    re_bit_count += n;
+    re_buffer_ptr += re_bit_count>>5;
+    re_bit_count &= 31;
+    re_cache0 = be2me_32( re_buffer_ptr[-1] ) << re_bit_count;
+    re_cache1 = 0;
+    UPDATE_CACHE(re, s)
+    CLOSE_READER(re, s)
+}
+#endif
+/**
+ * read mpeg1 dc style vlc (sign bit + mantisse with no MSB).
+ * if MSB not set it is negative
+ * @param n length in bits
+ * @author BERO
+ */
+static inline int get_xbits(GetBitContext *s, int n){
+    register int sign;
+    register int32_t cache;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    cache = GET_CACHE(re,s);
+    sign=(~cache)>>31;
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return (NEG_USR32(sign ^ cache, n) ^ sign) - sign;
+}
+static inline int get_sbits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_SBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+/**
+ * reads 1-17 bits.
+ * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
+ */
+static inline unsigned int get_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+/**
+ * shows 1-17 bits.
+ * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
+ */
+static inline unsigned int show_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+//    CLOSE_READER(re, s)
+    return tmp;
+}
+static inline void skip_bits(GetBitContext *s, int n){
+ //Note gcc seems to optimize this to s->index+=n for the ALT_READER :))
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+}
+static inline unsigned int get_bits1(GetBitContext *s){
+#ifdef ALT_BITSTREAM_READER
+    unsigned int index= s->index;
+    uint8_t result= s->buffer[ index>>3 ];
+#ifdef ALT_BITSTREAM_READER_LE
+    result>>= (index&0x07);
+    result&= 1;
+#else
+    result<<= (index&0x07);
+    result>>= 8 - 1;
+#endif
+    index++;
+    s->index= index;
+    return result;
+#else
+    return get_bits(s, 1);
+#endif
+}
+static inline unsigned int show_bits1(GetBitContext *s){
+    return show_bits(s, 1);
+}
+static inline void skip_bits1(GetBitContext *s){
+    skip_bits(s, 1);
+}
+/**
+ * reads 0-32 bits.
+ */
+static inline unsigned int get_bits_long(GetBitContext *s, int n){
+    if(n<=MIN_CACHE_BITS) return get_bits(s, n);
+    else{
+#ifdef ALT_BITSTREAM_READER_LE
+        int ret= get_bits(s, 16);
+        return ret | (get_bits(s, n-16) << 16);
+#else
+        int ret= get_bits(s, 16) << (n-16);
+        return ret | get_bits(s, n-16);
+#endif
+    }
+}
+#if 0
+/* mt : switched those off */
+/**
+ * reads 0-32 bits as a signed integer.
+ */
+static inline int get_sbits_long(GetBitContext *s, int n) {
+    return sign_extend(get_bits_long(s, n), n);
+}
+#endif
+/**
+ * shows 0-32 bits.
+ */
+static inline unsigned int show_bits_long(GetBitContext *s, int n){
+    if(n<=MIN_CACHE_BITS) return show_bits(s, n);
+    else{
+        GetBitContext gb= *s;
+        return get_bits_long(&gb, n);
+    }
+}
+static inline int check_marker(GetBitContext *s, const char *msg)
+{
+    int bit= get_bits1(s);
+    if(!bit)
+        av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
+    return bit;
+}
+/**
+ * init GetBitContext.
+ * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits
+ * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
+ * @param bit_size the size of the buffer in bits
+ *
+ * While GetBitContext stores the buffer size, for performance reasons you are
+ * responsible for checking for the buffer end yourself (take advantage of the padding)!
+ */
+static inline void init_get_bits(GetBitContext *s,
+                   const uint8_t *buffer, int bit_size)
+{
+    int buffer_size= (bit_size+7)>>3;
+    if(buffer_size < 0 || bit_size < 0) {
+        buffer_size = bit_size = 0;
+        buffer = NULL;
+    }
+    s->buffer= buffer;
+    s->size_in_bits= bit_size;
+    s->buffer_end= buffer + buffer_size;
+#ifdef ALT_BITSTREAM_READER
+    s->index=0;
+#elif defined LIBMPEG2_BITSTREAM_READER
+    s->buffer_ptr = (uint8_t*)((intptr_t)buffer&(~1));
+    s->bit_count = 16 + 8*((intptr_t)buffer&1);
+    skip_bits_long(s, 0);
+#elif defined A32_BITSTREAM_READER
+    s->buffer_ptr = (uint32_t*)((intptr_t)buffer&(~3));
+    s->bit_count = 32 + 8*((intptr_t)buffer&3);
+    skip_bits_long(s, 0);
+#endif
+}
+static inline void align_get_bits(GetBitContext *s)
+{
+    int n= (-get_bits_count(s)) & 7;
+    if(n) skip_bits(s, n);
+}
+#define init_vlc(vlc, nb_bits, nb_codes,\
+                 bits, bits_wrap, bits_size,\
+                 codes, codes_wrap, codes_size,\
+                 flags)\
+        init_vlc_sparse(vlc, nb_bits, nb_codes,\
+                 bits, bits_wrap, bits_size,\
+                 codes, codes_wrap, codes_size,\
+                 NULL, 0, 0, flags)
+int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
+             const void *bits, int bits_wrap, int bits_size,
+             const void *codes, int codes_wrap, int codes_size,
+             const void *symbols, int symbols_wrap, int symbols_size,
+             int flags);
+#define INIT_VLC_LE         2
+#define INIT_VLC_USE_NEW_STATIC 4
+void free_vlc(VLC *vlc);
+#define INIT_VLC_STATIC(vlc, bits, a,b,c,d,e,f,g, static_size)\
+{\
+    static VLC_TYPE table[static_size][2];\
+    (vlc)->table= table;\
+    (vlc)->table_allocated= static_size;\
+    init_vlc(vlc, bits, a,b,c,d,e,f,g, INIT_VLC_USE_NEW_STATIC);\
+}
+/**
+ *
+ * If the vlc code is invalid and max_depth=1, then no bits will be removed.
+ * If the vlc code is invalid and max_depth>1, then the number of bits removed
+ * is undefined.
+ */
+#define GET_VLC(code, name, gb, table, bits, max_depth)\
+{\
+    int n, nb_bits;\
+    unsigned int index;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    code = table[index][0];\
+    n    = table[index][1];\
+\
+    if(max_depth > 1 && n < 0){\
+        LAST_SKIP_BITS(name, gb, bits)\
+        UPDATE_CACHE(name, gb)\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + code;\
+        code = table[index][0];\
+        n    = table[index][1];\
+        if(max_depth > 2 && n < 0){\
+            LAST_SKIP_BITS(name, gb, nb_bits)\
+            UPDATE_CACHE(name, gb)\
+\
+            nb_bits = -n;\
+\
+            index= SHOW_UBITS(name, gb, nb_bits) + code;\
+            code = table[index][0];\
+            n    = table[index][1];\
+        }\
+    }\
+    SKIP_BITS(name, gb, n)\
+}
+#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\
+{\
+    int n, nb_bits;\
+    unsigned int index;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    level = table[index].level;\
+    n     = table[index].len;\
+\
+    if(max_depth > 1 && n < 0){\
+        SKIP_BITS(name, gb, bits)\
+        if(need_update){\
+            UPDATE_CACHE(name, gb)\
+        }\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + level;\
+        level = table[index].level;\
+        n     = table[index].len;\
+    }\
+    run= table[index].run;\
+    SKIP_BITS(name, gb, n)\
+}
+/**
+ * parses a vlc code, faster then get_vlc()
+ * @param bits is the number of bits which will be read at once, must be
+ *             identical to nb_bits in init_vlc()
+ * @param max_depth is the number of times bits bits must be read to completely
+ *                  read the longest vlc code
+ *                  = (max_vlc_length + bits - 1) / bits
+ */
+static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
+                                  int bits, int max_depth)
+{
+    int code;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    GET_VLC(code, re, s, table, bits, max_depth)
+    CLOSE_READER(re, s)
+    return code;
+}
+//#define TRACE
+#ifdef TRACE
+static inline void print_bin(int bits, int n){
+    int i;
+    for(i=n-1; i>=0; i--){
+        av_log(NULL, AV_LOG_DEBUG, "%d", (bits>>i)&1);
+    }
+    for(i=n; i<24; i++)
+        av_log(NULL, AV_LOG_DEBUG, " ");
+}
+static inline int get_bits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
+    int r= get_bits(s, n);
+    print_bin(r, n);
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line);
+    return r;
+}
+static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth, char *file, const char *func, int line){
+    int show= show_bits(s, 24);
+    int pos= get_bits_count(s);
+    int r= get_vlc2(s, table, bits, max_depth);
+    int len= get_bits_count(s) - pos;
+    int bits2= show>>(24-len);
+    print_bin(bits2, len);
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line);
+    return r;
+}
+static inline int get_xbits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
+    int show= show_bits(s, n);
+    int r= get_xbits(s, n);
+    print_bin(show, n);
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line);
+    return r;
+}
+#define get_bits(s, n)  get_bits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_bits1(s)    get_bits_trace(s, 1, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_xbits(s, n) get_xbits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_vlc(s, vlc)            get_vlc_trace(s, (vlc)->table, (vlc)->bits, 3, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_vlc2(s, tab, bits, max) get_vlc_trace(s, tab, bits, max, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define tprintf(p, ...) av_log(p, AV_LOG_DEBUG, __VA_ARGS__)
+#else //TRACE
+#define tprintf(p, ...) {}
+#endif
+static inline int decode012(GetBitContext *gb){
+    int n;
+    n = get_bits1(gb);
+    if (n == 0)
+        return 0;
+    else
+        return get_bits1(gb) + 1;
+}
+static inline int decode210(GetBitContext *gb){
+    if (get_bits1(gb))
+        return 0;
+    else
+        return 2 - get_bits1(gb);
+}
+static inline int get_bits_left(GetBitContext *gb)
+{
+    return gb->size_in_bits - get_bits_count(gb);
+}
+#endif /* AVCODEC_GET_BITS_H */
diff --git a/apps/codecs/libwmapro/internal.h b/apps/codecs/libwmapro/internal.h
new file mode 100644
index 0000000000..b813480be9
--- /dev/null
+++ b/apps/codecs/libwmapro/internal.h
@@ -0,0 +1,51 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/internal.h
+ * common internal api header.
+ */
+#ifndef AVCODEC_INTERNAL_H
+#define AVCODEC_INTERNAL_H
+#include <stdint.h>
+#include "avcodec.h"
+/**
+ * Determines whether pix_fmt is a hardware accelerated format.
+ */
+//int ff_is_hwaccel_pix_fmt(enum PixelFormat pix_fmt);
+/**
+ * Returns the hardware accelerated codec for codec codec_id and
+ * pixel format pix_fmt.
+ *
+ * @param codec_id the codec to match
+ * @param pix_fmt the pixel format to match
+ * @return the hardware accelerated codec, or NULL if none was found.
+ */
+//AVHWAccel *ff_find_hwaccel(enum CodecID codec_id, enum PixelFormat pix_fmt);
+/**
+ * Return the index into tab at which {a,b} match elements {[0],[1]} of tab.
+ * If there is no such matching pair then size is returned.
+ */
+int ff_match_2uint16(const uint16_t (*tab)[2], int size, int a, int b);
+#endif /* AVCODEC_INTERNAL_H */
diff --git a/apps/codecs/libwmapro/libavutil/attributes.h b/apps/codecs/libwmapro/libavutil/attributes.h
new file mode 100644
index 0000000000..1208bc0c72
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/attributes.h
@@ -0,0 +1,113 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/attributes.h
+ * Macro definitions for various function/variable attributes
+ */
+#ifndef AVUTIL_ATTRIBUTES_H
+#define AVUTIL_ATTRIBUTES_H
+#ifdef __GNUC__
+#    define AV_GCC_VERSION_AT_LEAST(x,y) (__GNUC__ > x || __GNUC__ == x && __GNUC_MINOR__ >= y)
+#else
+#    define AV_GCC_VERSION_AT_LEAST(x,y) 0
+#endif
+#ifndef av_always_inline
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define av_always_inline __attribute__((always_inline)) inline
+#else
+#    define av_always_inline inline
+#endif
+#endif
+#ifndef av_noinline
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define av_noinline __attribute__((noinline))
+#else
+#    define av_noinline
+#endif
+#endif
+#ifndef av_pure
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define av_pure __attribute__((pure))
+#else
+#    define av_pure
+#endif
+#endif
+#ifndef av_const
+#if AV_GCC_VERSION_AT_LEAST(2,6)
+#    define av_const __attribute__((const))
+#else
+#    define av_const
+#endif
+#endif
+#ifndef av_cold
+#if (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(4,3)
+#    define av_cold __attribute__((cold))
+#else
+#    define av_cold
+#endif
+#endif
+#ifndef av_flatten
+#if (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(4,1)
+#    define av_flatten __attribute__((flatten))
+#else
+#    define av_flatten
+#endif
+#endif
+#ifndef attribute_deprecated
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define attribute_deprecated __attribute__((deprecated))
+#else
+#    define attribute_deprecated
+#endif
+#endif
+#ifndef av_unused
+#if defined(__GNUC__)
+#    define av_unused __attribute__((unused))
+#else
+#    define av_unused
+#endif
+#endif
+#ifndef av_uninit
+#if defined(__GNUC__) && !defined(__ICC)
+#    define av_uninit(x) x=x
+#else
+#    define av_uninit(x) x
+#endif
+#endif
+#ifdef __GNUC__
+#    define av_builtin_constant_p __builtin_constant_p
+#else
+#    define av_builtin_constant_p(x) 0
+#endif
+#endif /* AVUTIL_ATTRIBUTES_H */
diff --git a/apps/codecs/libwmapro/libavutil/avutil.h b/apps/codecs/libwmapro/libavutil/avutil.h
new file mode 100644
index 0000000000..c07e44d660
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/avutil.h
@@ -0,0 +1,63 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVUTIL_AVUTIL_H
+#define AVUTIL_AVUTIL_H
+/**
+ * @file libavutil/avutil.h
+ * external API header
+ */
+#define AV_STRINGIFY(s)         AV_TOSTRING(s)
+#define AV_TOSTRING(s) #s
+#define AV_VERSION_INT(a, b, c) (a<<16 | b<<8 | c)
+#define AV_VERSION_DOT(a, b, c) a ##.## b ##.## c
+#define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
+#define LIBAVUTIL_VERSION_MAJOR 50
+#define LIBAVUTIL_VERSION_MINOR  0
+#define LIBAVUTIL_VERSION_MICRO  0
+#define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
+                                               LIBAVUTIL_VERSION_MINOR, \
+                                               LIBAVUTIL_VERSION_MICRO)
+#define LIBAVUTIL_VERSION       AV_VERSION(LIBAVUTIL_VERSION_MAJOR,     \
+                                           LIBAVUTIL_VERSION_MINOR,     \
+                                           LIBAVUTIL_VERSION_MICRO)
+#define LIBAVUTIL_BUILD         LIBAVUTIL_VERSION_INT
+#define LIBAVUTIL_IDENT         "Lavu" AV_STRINGIFY(LIBAVUTIL_VERSION)
+/**
+ * Returns the LIBAVUTIL_VERSION_INT constant.
+ */
+unsigned avutil_version(void);
+#include "common.h"
+//#include "mathematics.h"
+//#include "rational.h"
+//#include "intfloat_readwrite.h"
+#include "log.h"
+//#include "pixfmt.h"
+#endif /* AVUTIL_AVUTIL_H */
diff --git a/apps/codecs/libwmapro/libavutil/bswap.h b/apps/codecs/libwmapro/libavutil/bswap.h
new file mode 100644
index 0000000000..9175cb24a5
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/bswap.h
@@ -0,0 +1,99 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/bswap.h
+ * byte swapping routines
+ */
+#ifndef AVUTIL_BSWAP_H
+#define AVUTIL_BSWAP_H
+#include <stdint.h>
+//#include "ffmpeg_config.h"
+#include "common.h"
+#if   ARCH_ARM
+#   include "arm/bswap.h"
+#elif ARCH_BFIN
+#   include "bfin/bswap.h"
+#elif ARCH_SH4
+#   include "sh4/bswap.h"
+#elif ARCH_X86
+#   include "x86/bswap.h"
+#endif
+#ifndef bswap_16
+static av_always_inline av_const uint16_t bswap_16(uint16_t x)
+{
+    x= (x>>8) | (x<<8);
+    return x;
+}
+#endif
+#ifndef bswap_32
+static av_always_inline av_const uint32_t bswap_32(uint32_t x)
+{
+    x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
+    x= (x>>16) | (x<<16);
+    return x;
+}
+#endif
+#ifndef bswap_64
+static inline uint64_t av_const bswap_64(uint64_t x)
+{
+#if 0
+    x= ((x<< 8)&0xFF00FF00FF00FF00ULL) | ((x>> 8)&0x00FF00FF00FF00FFULL);
+    x= ((x<<16)&0xFFFF0000FFFF0000ULL) | ((x>>16)&0x0000FFFF0000FFFFULL);
+    return (x>>32) | (x<<32);
+#else
+    union {
+        uint64_t ll;
+        uint32_t l[2];
+    } w, r;
+    w.ll = x;
+    r.l[0] = bswap_32 (w.l[1]);
+    r.l[1] = bswap_32 (w.l[0]);
+    return r.ll;
+#endif
+}
+#endif
+// be2me ... big-endian to machine-endian
+// le2me ... little-endian to machine-endian
+#ifdef WORDS_BIGENDIAN
+#define be2me_16(x) (x)
+#define be2me_32(x) (x)
+#define be2me_64(x) (x)
+#define le2me_16(x) bswap_16(x)
+#define le2me_32(x) bswap_32(x)
+#define le2me_64(x) bswap_64(x)
+#else
+#define be2me_16(x) bswap_16(x)
+#define be2me_32(x) bswap_32(x)
+#define be2me_64(x) bswap_64(x)
+#define le2me_16(x) (x)
+#define le2me_32(x) (x)
+#define le2me_64(x) (x)
+#endif
+#endif /* AVUTIL_BSWAP_H */
diff --git a/apps/codecs/libwmapro/libavutil/common.h b/apps/codecs/libwmapro/libavutil/common.h
new file mode 100644
index 0000000000..f3bc4ba44b
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/common.h
@@ -0,0 +1,299 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/common.h
+ * common internal and external API header
+ */
+#ifndef AVUTIL_COMMON_H
+#define AVUTIL_COMMON_H
+#include <ctype.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "attributes.h"
+#include "internal.h"
+//rounded division & shift
+#define RSHIFT(a,b) ((a) > 0 ? ((a) + ((1<<(b))>>1))>>(b) : ((a) + ((1<<(b))>>1)-1)>>(b))
+/* assume b>0 */
+#define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
+#define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
+#define FFSIGN(a) ((a) > 0 ? 1 : -1)
+#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
+#define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
+#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
+#define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)
+#define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
+#define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
+#define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1))
+/* misc math functions */
+extern const uint8_t ff_log2_tab[256];
+extern const uint8_t av_reverse[256];
+static inline av_const int av_log2_c(unsigned int v)
+{
+    int n = 0;
+    if (v & 0xffff0000) {
+        v >>= 16;
+        n += 16;
+    }
+    if (v & 0xff00) {
+        v >>= 8;
+        n += 8;
+    }
+    n += ff_log2_tab[v];
+    return n;
+}
+static inline av_const int av_log2_16bit_c(unsigned int v)
+{
+    int n = 0;
+    if (v & 0xff00) {
+        v >>= 8;
+        n += 8;
+    }
+    n += ff_log2_tab[v];
+    return n;
+}
+#ifdef HAVE_AV_CONFIG_H
+#   include "config.h"
+#   include "intmath.h"
+#endif
+#ifndef av_log2
+#   define av_log2       av_log2_c
+#endif
+#ifndef av_log2_16bit
+#   define av_log2_16bit av_log2_16bit_c
+#endif
+/**
+ * Clips a signed integer value into the amin-amax range.
+ * @param a value to clip
+ * @param amin minimum value of the clip range
+ * @param amax maximum value of the clip range
+ * @return clipped value
+ */
+static inline av_const int av_clip(int a, int amin, int amax)
+{
+    if      (a < amin) return amin;
+    else if (a > amax) return amax;
+    else               return a;
+}
+/**
+ * Clips a signed integer value into the 0-255 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static inline av_const uint8_t av_clip_uint8(int a)
+{
+    if (a&(~255)) return (-a)>>31;
+    else          return a;
+}
+/**
+ * Clips a signed integer value into the 0-65535 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static inline av_const uint16_t av_clip_uint16(int a)
+{
+    if (a&(~65535)) return (-a)>>31;
+    else            return a;
+}
+/**
+ * Clips a signed integer value into the -32768,32767 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static inline av_const int16_t av_clip_int16(int a)
+{
+    if ((a+32768) & ~65535) return (a>>31) ^ 32767;
+    else                    return a;
+}
+/**
+ * Clips a float value into the amin-amax range.
+ * @param a value to clip
+ * @param amin minimum value of the clip range
+ * @param amax maximum value of the clip range
+ * @return clipped value
+ */
+static inline av_const float av_clipf(float a, float amin, float amax)
+{
+    if      (a < amin) return amin;
+    else if (a > amax) return amax;
+    else               return a;
+}
+/** Computes ceil(log2(x)).
+ * @param x value used to compute ceil(log2(x))
+ * @return computed ceiling of log2(x)
+ */
+static inline av_const int av_ceil_log2(int x)
+{
+    return av_log2((x - 1) << 1);
+}
+#define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24))
+#define MKBETAG(a,b,c,d) (d | (c << 8) | (b << 16) | (a << 24))
+/*!
+ * \def GET_UTF8(val, GET_BYTE, ERROR)
+ * Converts a UTF-8 character (up to 4 bytes long) to its 32-bit UCS-4 encoded form
+ * \param val is the output and should be of type uint32_t. It holds the converted
+ * UCS-4 character and should be a left value.
+ * \param GET_BYTE gets UTF-8 encoded bytes from any proper source. It can be
+ * a function or a statement whose return value or evaluated value is of type
+ * uint8_t. It will be executed up to 4 times for values in the valid UTF-8 range,
+ * and up to 7 times in the general case.
+ * \param ERROR action that should be taken when an invalid UTF-8 byte is returned
+ * from GET_BYTE. It should be a statement that jumps out of the macro,
+ * like exit(), goto, return, break, or continue.
+ */
+#define GET_UTF8(val, GET_BYTE, ERROR)\
+    val= GET_BYTE;\
+    {\
+        int ones= 7 - av_log2(val ^ 255);\
+        if(ones==1)\
+            ERROR\
+        val&= 127>>ones;\
+        while(--ones > 0){\
+            int tmp= GET_BYTE - 128;\
+            if(tmp>>6)\
+                ERROR\
+            val= (val<<6) + tmp;\
+        }\
+    }
+/*!
+ * \def GET_UTF16(val, GET_16BIT, ERROR)
+ * Converts a UTF-16 character (2 or 4 bytes) to its 32-bit UCS-4 encoded form
+ * \param val is the output and should be of type uint32_t. It holds the converted
+ * UCS-4 character and should be a left value.
+ * \param GET_16BIT gets two bytes of UTF-16 encoded data converted to native endianness.
+ * It can be a function or a statement whose return value or evaluated value is of type
+ * uint16_t. It will be executed up to 2 times.
+ * \param ERROR action that should be taken when an invalid UTF-16 surrogate is
+ * returned from GET_BYTE. It should be a statement that jumps out of the macro,
+ * like exit(), goto, return, break, or continue.
+ */
+#define GET_UTF16(val, GET_16BIT, ERROR)\
+    val = GET_16BIT;\
+    {\
+        unsigned int hi = val - 0xD800;\
+        if (hi < 0x800) {\
+            val = GET_16BIT - 0xDC00;\
+            if (val > 0x3FFU || hi > 0x3FFU)\
+                ERROR\
+            val += (hi<<10) + 0x10000;\
+        }\
+    }\
+/*!
+ * \def PUT_UTF8(val, tmp, PUT_BYTE)
+ * Converts a 32-bit Unicode character to its UTF-8 encoded form (up to 4 bytes long).
+ * \param val is an input-only argument and should be of type uint32_t. It holds
+ * a UCS-4 encoded Unicode character that is to be converted to UTF-8. If
+ * val is given as a function it is executed only once.
+ * \param tmp is a temporary variable and should be of type uint8_t. It
+ * represents an intermediate value during conversion that is to be
+ * output by PUT_BYTE.
+ * \param PUT_BYTE writes the converted UTF-8 bytes to any proper destination.
+ * It could be a function or a statement, and uses tmp as the input byte.
+ * For example, PUT_BYTE could be "*output++ = tmp;" PUT_BYTE will be
+ * executed up to 4 times for values in the valid UTF-8 range and up to
+ * 7 times in the general case, depending on the length of the converted
+ * Unicode character.
+ */
+#define PUT_UTF8(val, tmp, PUT_BYTE)\
+    {\
+        int bytes, shift;\
+        uint32_t in = val;\
+        if (in < 0x80) {\
+            tmp = in;\
+            PUT_BYTE\
+        } else {\
+            bytes = (av_log2(in) + 4) / 5;\
+            shift = (bytes - 1) * 6;\
+            tmp = (256 - (256 >> bytes)) | (in >> shift);\
+            PUT_BYTE\
+            while (shift >= 6) {\
+                shift -= 6;\
+                tmp = 0x80 | ((in >> shift) & 0x3f);\
+                PUT_BYTE\
+            }\
+        }\
+    }
+/*!
+ * \def PUT_UTF16(val, tmp, PUT_16BIT)
+ * Converts a 32-bit Unicode character to its UTF-16 encoded form (2 or 4 bytes).
+ * \param val is an input-only argument and should be of type uint32_t. It holds
+ * a UCS-4 encoded Unicode character that is to be converted to UTF-16. If
+ * val is given as a function it is executed only once.
+ * \param tmp is a temporary variable and should be of type uint16_t. It
+ * represents an intermediate value during conversion that is to be
+ * output by PUT_16BIT.
+ * \param PUT_16BIT writes the converted UTF-16 data to any proper destination
+ * in desired endianness. It could be a function or a statement, and uses tmp
+ * as the input byte.  For example, PUT_BYTE could be "*output++ = tmp;"
+ * PUT_BYTE will be executed 1 or 2 times depending on input character.
+ */
+#define PUT_UTF16(val, tmp, PUT_16BIT)\
+    {\
+        uint32_t in = val;\
+        if (in < 0x10000) {\
+            tmp = in;\
+            PUT_16BIT\
+        } else {\
+            tmp = 0xD800 | ((in - 0x10000) >> 10);\
+            PUT_16BIT\
+            tmp = 0xDC00 | ((in - 0x10000) & 0x3FF);\
+            PUT_16BIT\
+        }\
+    }\
+#include "mem.h"
+#if 0
+#ifdef HAVE_AV_CONFIG_H
+#    include "internal.h"
+#endif /* HAVE_AV_CONFIG_H */
+#endif
+#endif /* AVUTIL_COMMON_H */
diff --git a/apps/codecs/libwmapro/libavutil/internal.h b/apps/codecs/libwmapro/libavutil/internal.h
new file mode 100644
index 0000000000..2b8da1d8c2
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/internal.h
@@ -0,0 +1,207 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/internal.h
+ * common internal API header
+ */
+#ifndef AVUTIL_INTERNAL_H
+#define AVUTIL_INTERNAL_H
+#if !defined(DEBUG) && !defined(NDEBUG)
+#    define NDEBUG
+#endif
+#include <limits.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <assert.h>
+//#include "config.h"
+#include "attributes.h"
+//#include "timer.h"
+#ifndef attribute_align_arg
+#if (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(4,2)
+#    define attribute_align_arg __attribute__((force_align_arg_pointer))
+#else
+#    define attribute_align_arg
+#endif
+#endif
+#ifndef attribute_used
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define attribute_used __attribute__((used))
+#else
+#    define attribute_used
+#endif
+#endif
+#ifndef av_alias
+#if HAVE_ATTRIBUTE_MAY_ALIAS && (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(3,3)
+#   define av_alias __attribute__((may_alias))
+#else
+#   define av_alias
+#endif
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN       (-0x7fff - 1)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX       0x7fff
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN       (-0x7fffffff - 1)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX       0x7fffffff
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX      0xffffffff
+#endif
+#ifndef INT64_MIN
+#define INT64_MIN       (-0x7fffffffffffffffLL - 1)
+#endif
+#ifndef INT64_MAX
+#define INT64_MAX INT64_C(9223372036854775807)
+#endif
+#ifndef UINT64_MAX
+#define UINT64_MAX UINT64_C(0xFFFFFFFFFFFFFFFF)
+#endif
+#ifndef INT_BIT
+#    define INT_BIT (CHAR_BIT * sizeof(int))
+#endif
+#ifndef offsetof
+#    define offsetof(T, F) ((unsigned int)((char *)&((T *)0)->F))
+#endif
+/* Use to export labels from asm. */
+#define LABEL_MANGLE(a) EXTERN_PREFIX #a
+// Use rip-relative addressing if compiling PIC code on x86-64.
+#if ARCH_X86_64 && defined(PIC)
+#    define LOCAL_MANGLE(a) #a "(%%rip)"
+#else
+#    define LOCAL_MANGLE(a) #a
+#endif
+#define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a)
+/* debug stuff */
+/* dprintf macros */
+#ifdef DEBUG
+#    define dprintf(pctx, ...) av_log(pctx, AV_LOG_DEBUG, __VA_ARGS__)
+#else
+#    define dprintf(pctx, ...)
+#endif
+#define av_abort()      do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0)
+/* math */
+#if ARCH_X86
+#define MASK_ABS(mask, level)\
+            __asm__ volatile(\
+                "cltd                   \n\t"\
+                "xorl %1, %0            \n\t"\
+                "subl %1, %0            \n\t"\
+                : "+a" (level), "=&d" (mask)\
+            );
+#else
+#define MASK_ABS(mask, level)\
+            mask  = level >> 31;\
+            level = (level ^ mask) - mask;
+#endif
+/* avoid usage of dangerous/inappropriate system functions */
+#undef  malloc
+#define malloc please_use_av_malloc
+#undef  free
+#define free please_use_av_free
+#undef  realloc
+#define realloc please_use_av_realloc
+#undef  time
+#define time time_is_forbidden_due_to_security_issues
+#undef  rand
+#define rand rand_is_forbidden_due_to_state_trashing_use_av_lfg_get
+#undef  srand
+#define srand srand_is_forbidden_due_to_state_trashing_use_av_lfg_init
+#undef  random
+#define random random_is_forbidden_due_to_state_trashing_use_av_lfg_get
+#undef  sprintf
+#define sprintf sprintf_is_forbidden_due_to_security_issues_use_snprintf
+#undef  strcat
+#define strcat strcat_is_forbidden_due_to_security_issues_use_av_strlcat
+#undef  exit
+#define exit exit_is_forbidden
+#ifndef LIBAVFORMAT_BUILD
+#undef  printf
+#define printf please_use_av_log_instead_of_printf
+#undef  fprintf
+#define fprintf please_use_av_log_instead_of_fprintf
+#undef  puts
+#define puts please_use_av_log_instead_of_puts
+#undef  perror
+#define perror please_use_av_log_instead_of_perror
+#endif
+#define FF_ALLOC_OR_GOTO(ctx, p, size, label)\
+{\
+    p = av_malloc(size);\
+    if (p == NULL && (size) != 0) {\
+        av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory.\n");\
+        goto label;\
+    }\
+}
+#define FF_ALLOCZ_OR_GOTO(ctx, p, size, label)\
+{\
+    p = av_mallocz(size);\
+    if (p == NULL && (size) != 0) {\
+        av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory.\n");\
+        goto label;\
+    }\
+}
+//#include "libm.h"
+/**
+ * Returns NULL if CONFIG_SMALL is true, otherwise the argument
+ * without modification. Used to disable the definition of strings
+ * (for example AVCodec long_names).
+ */
+#if CONFIG_SMALL
+#   define NULL_IF_CONFIG_SMALL(x) NULL
+#else
+#   define NULL_IF_CONFIG_SMALL(x) x
+#endif
+#endif /* AVUTIL_INTERNAL_H */
diff --git a/apps/codecs/libwmapro/libavutil/intreadwrite.h b/apps/codecs/libwmapro/libavutil/intreadwrite.h
new file mode 100644
index 0000000000..d27a50061e
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/intreadwrite.h
@@ -0,0 +1,192 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVUTIL_INTREADWRITE_H
+#define AVUTIL_INTREADWRITE_H
+#include <stdint.h>
+//#include "ffmpeg_config.h"
+#include "bswap.h"
+#ifdef __GNUC__
+struct unaligned_64 { uint64_t l; } __attribute__((packed));
+struct unaligned_32 { uint32_t l; } __attribute__((packed));
+struct unaligned_16 { uint16_t l; } __attribute__((packed));
+#define AV_RN16(a) (((const struct unaligned_16 *) (a))->l)
+#define AV_RN32(a) (((const struct unaligned_32 *) (a))->l)
+#define AV_RN64(a) (((const struct unaligned_64 *) (a))->l)
+#define AV_WN16(a, b) (((struct unaligned_16 *) (a))->l) = (b)
+#define AV_WN32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
+#define AV_WN64(a, b) (((struct unaligned_64 *) (a))->l) = (b)
+#elif defined(__DECC)
+#define AV_RN16(a) (*((const __unaligned uint16_t*)(a)))
+#define AV_RN32(a) (*((const __unaligned uint32_t*)(a)))
+#define AV_RN64(a) (*((const __unaligned uint64_t*)(a)))
+#define AV_WN16(a, b) *((__unaligned uint16_t*)(a)) = (b)
+#define AV_WN32(a, b) *((__unaligned uint32_t*)(a)) = (b)
+#define AV_WN64(a, b) *((__unaligned uint64_t*)(a)) = (b)
+#else
+#define AV_RN16(a) (*((const uint16_t*)(a)))
+#define AV_RN32(a) (*((const uint32_t*)(a)))
+#define AV_RN64(a) (*((const uint64_t*)(a)))
+#define AV_WN16(a, b) *((uint16_t*)(a)) = (b)
+#define AV_WN32(a, b) *((uint32_t*)(a)) = (b)
+#define AV_WN64(a, b) *((uint64_t*)(a)) = (b)
+#endif /* !__GNUC__ */
+/* endian macros */
+#define AV_RB8(x)     (((const uint8_t*)(x))[0])
+#define AV_WB8(p, d)  do { ((uint8_t*)(p))[0] = (d); } while(0)
+#define AV_RL8(x)     AV_RB8(x)
+#define AV_WL8(p, d)  AV_WB8(p, d)
+#if HAVE_FAST_UNALIGNED
+# ifdef WORDS_BIGENDIAN
+#  define AV_RB16(x)    AV_RN16(x)
+#  define AV_WB16(p, d) AV_WN16(p, d)
+#  define AV_RL16(x)    bswap_16(AV_RN16(x))
+#  define AV_WL16(p, d) AV_WN16(p, bswap_16(d))
+#  define AV_RB32(x)    AV_RN32(x)
+#  define AV_WB32(p, d) AV_WN32(p, d)
+#  define AV_RL32(x)    bswap_32(AV_RN32(x))
+#  define AV_WL32(p, d) AV_WN32(p, bswap_32(d))
+#  define AV_RB64(x)    AV_RN64(x)
+#  define AV_WB64(p, d) AV_WN64(p, d)
+#  define AV_RL64(x)    bswap_64(AV_RN64(x))
+#  define AV_WL64(p, d) AV_WN64(p, bswap_64(d))
+# else /* WORDS_BIGENDIAN */
+#  define AV_RB16(x)    bswap_16(AV_RN16(x))
+#  define AV_WB16(p, d) AV_WN16(p, bswap_16(d))
+#  define AV_RL16(x)    AV_RN16(x)
+#  define AV_WL16(p, d) AV_WN16(p, d)
+#  define AV_RB32(x)    bswap_32(AV_RN32(x))
+#  define AV_WB32(p, d) AV_WN32(p, bswap_32(d))
+#  define AV_RL32(x)    AV_RN32(x)
+#  define AV_WL32(p, d) AV_WN32(p, d)
+#  define AV_RB64(x)    bswap_64(AV_RN64(x))
+#  define AV_WB64(p, d) AV_WN64(p, bswap_64(d))
+#  define AV_RL64(x)    AV_RN64(x)
+#  define AV_WL64(p, d) AV_WN64(p, d)
+# endif
+#else /* HAVE_FAST_UNALIGNED */
+#define AV_RB16(x)  ((((const uint8_t*)(x))[0] << 8) | ((const uint8_t*)(x))[1])
+#define AV_WB16(p, d) do { \
+                    ((uint8_t*)(p))[1] = (d); \
+                    ((uint8_t*)(p))[0] = (d)>>8; } while(0)
+#define AV_RL16(x)  ((((const uint8_t*)(x))[1] << 8) | \
+                      ((const uint8_t*)(x))[0])
+#define AV_WL16(p, d) do { \
+                    ((uint8_t*)(p))[0] = (d); \
+                    ((uint8_t*)(p))[1] = (d)>>8; } while(0)
+#define AV_RB32(x)  ((((const uint8_t*)(x))[0] << 24) | \
+                     (((const uint8_t*)(x))[1] << 16) | \
+                     (((const uint8_t*)(x))[2] <<  8) | \
+                      ((const uint8_t*)(x))[3])
+#define AV_WB32(p, d) do { \
+                    ((uint8_t*)(p))[3] = (d); \
+                    ((uint8_t*)(p))[2] = (d)>>8; \
+                    ((uint8_t*)(p))[1] = (d)>>16; \
+                    ((uint8_t*)(p))[0] = (d)>>24; } while(0)
+#define AV_RL32(x) ((((const uint8_t*)(x))[3] << 24) | \
+                    (((const uint8_t*)(x))[2] << 16) | \
+                    (((const uint8_t*)(x))[1] <<  8) | \
+                     ((const uint8_t*)(x))[0])
+#define AV_WL32(p, d) do { \
+                    ((uint8_t*)(p))[0] = (d); \
+                    ((uint8_t*)(p))[1] = (d)>>8; \
+                    ((uint8_t*)(p))[2] = (d)>>16; \
+                    ((uint8_t*)(p))[3] = (d)>>24; } while(0)
+#define AV_RB64(x)  (((uint64_t)((const uint8_t*)(x))[0] << 56) | \
+                     ((uint64_t)((const uint8_t*)(x))[1] << 48) | \
+                     ((uint64_t)((const uint8_t*)(x))[2] << 40) | \
+                     ((uint64_t)((const uint8_t*)(x))[3] << 32) | \
+                     ((uint64_t)((const uint8_t*)(x))[4] << 24) | \
+                     ((uint64_t)((const uint8_t*)(x))[5] << 16) | \
+                     ((uint64_t)((const uint8_t*)(x))[6] <<  8) | \
+                      (uint64_t)((const uint8_t*)(x))[7])
+#define AV_WB64(p, d) do { \
+                    ((uint8_t*)(p))[7] = (d);     \
+                    ((uint8_t*)(p))[6] = (d)>>8;  \
+                    ((uint8_t*)(p))[5] = (d)>>16; \
+                    ((uint8_t*)(p))[4] = (d)>>24; \
+                    ((uint8_t*)(p))[3] = (d)>>32; \
+                    ((uint8_t*)(p))[2] = (d)>>40; \
+                    ((uint8_t*)(p))[1] = (d)>>48; \
+                    ((uint8_t*)(p))[0] = (d)>>56; } while(0)
+#define AV_RL64(x)  (((uint64_t)((const uint8_t*)(x))[7] << 56) | \
+                     ((uint64_t)((const uint8_t*)(x))[6] << 48) | \
+                     ((uint64_t)((const uint8_t*)(x))[5] << 40) | \
+                     ((uint64_t)((const uint8_t*)(x))[4] << 32) | \
+                     ((uint64_t)((const uint8_t*)(x))[3] << 24) | \
+                     ((uint64_t)((const uint8_t*)(x))[2] << 16) | \
+                     ((uint64_t)((const uint8_t*)(x))[1] <<  8) | \
+                      (uint64_t)((const uint8_t*)(x))[0])
+#define AV_WL64(p, d) do { \
+                    ((uint8_t*)(p))[0] = (d);     \
+                    ((uint8_t*)(p))[1] = (d)>>8;  \
+                    ((uint8_t*)(p))[2] = (d)>>16; \
+                    ((uint8_t*)(p))[3] = (d)>>24; \
+                    ((uint8_t*)(p))[4] = (d)>>32; \
+                    ((uint8_t*)(p))[5] = (d)>>40; \
+                    ((uint8_t*)(p))[6] = (d)>>48; \
+                    ((uint8_t*)(p))[7] = (d)>>56; } while(0)
+#endif  /* HAVE_FAST_UNALIGNED */
+#define AV_RB24(x)  ((((const uint8_t*)(x))[0] << 16) | \
+                     (((const uint8_t*)(x))[1] <<  8) | \
+                      ((const uint8_t*)(x))[2])
+#define AV_WB24(p, d) do { \
+                    ((uint8_t*)(p))[2] = (d); \
+                    ((uint8_t*)(p))[1] = (d)>>8; \
+                    ((uint8_t*)(p))[0] = (d)>>16; } while(0)
+#define AV_RL24(x)  ((((const uint8_t*)(x))[2] << 16) | \
+                     (((const uint8_t*)(x))[1] <<  8) | \
+                      ((const uint8_t*)(x))[0])
+#define AV_WL24(p, d) do { \
+                    ((uint8_t*)(p))[0] = (d); \
+                    ((uint8_t*)(p))[1] = (d)>>8; \
+                    ((uint8_t*)(p))[2] = (d)>>16; } while(0)
+#endif /* AVUTIL_INTREADWRITE_H */
diff --git a/apps/codecs/libwmapro/libavutil/log.c b/apps/codecs/libwmapro/libavutil/log.c
new file mode 100644
index 0000000000..4bb9652c2c
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/log.c
@@ -0,0 +1,89 @@
+/*
+ * log functions
+ * Copyright (c) 2003 Michel Bardiaux
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/log.c
+ * logging functions
+ */
+#include "avutil.h"
+#include "log.h"
+int av_log_level = AV_LOG_INFO;
+void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl)
+{
+    static int print_prefix=1;
+    static int count;
+    static char line[1024], prev[1024];
+    AVClass* avc= ptr ? *(AVClass**)ptr : NULL;
+    if(level>av_log_level)
+        return;
+#undef fprintf
+    if(print_prefix && avc) {
+        snprintf(line, sizeof(line), "[%s @ %p]", avc->item_name(ptr), ptr);
+    }else
+        line[0]=0;
+    vsnprintf(line + strlen(line), sizeof(line) - strlen(line), fmt, vl);
+    print_prefix= line[strlen(line)-1] == '\n';
+    if(print_prefix && !strcmp(line, prev)){
+        count++;
+        return;
+    }
+    if(count>0){
+        fprintf(stderr, "    Last message repeated %d times\n", count);
+        count=0;
+    }
+    fputs(line, stderr);
+    strcpy(prev, line);
+}
+static void (*av_log_callback)(void*, int, const char*, va_list) = av_log_default_callback;
+void av_log(void* avcl, int level, const char *fmt, ...)
+{
+    va_list vl;
+    va_start(vl, fmt);
+    av_vlog(avcl, level, fmt, vl);
+    va_end(vl);
+}
+void av_vlog(void* avcl, int level, const char *fmt, va_list vl)
+{
+    av_log_callback(avcl, level, fmt, vl);
+}
+int av_log_get_level(void)
+{
+    return av_log_level;
+}
+void av_log_set_level(int level)
+{
+    av_log_level = level;
+}
+void av_log_set_callback(void (*callback)(void*, int, const char*, va_list))
+{
+    av_log_callback = callback;
+}
diff --git a/apps/codecs/libwmapro/libavutil/log.h b/apps/codecs/libwmapro/libavutil/log.h
new file mode 100644
index 0000000000..1206a2fc38
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/log.h
@@ -0,0 +1,116 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVUTIL_LOG_H
+#define AVUTIL_LOG_H
+#include <stdarg.h>
+#include "avutil.h"
+/**
+ * Describes the class of an AVClass context structure. That is an
+ * arbitrary struct of which the first field is a pointer to an
+ * AVClass struct (e.g. AVCodecContext, AVFormatContext etc.).
+ */
+typedef struct AVCLASS AVClass;
+struct AVCLASS {
+    /**
+     * The name of the class; usually it is the same name as the
+     * context structure type to which the AVClass is associated.
+     */
+    const char* class_name;
+    /**
+     * A pointer to a function which returns the name of a context
+     * instance \p ctx associated with the class.
+     */
+    const char* (*item_name)(void* ctx);
+    /**
+     * a pointer to the first option specified in the class if any or NULL
+     *
+     * @see av_set_default_options()
+     */
+    const struct AVOption *option;
+};
+/* av_log API */
+#define AV_LOG_QUIET    -8
+/**
+ * Something went really wrong and we will crash now.
+ */
+#define AV_LOG_PANIC     0
+/**
+ * Something went wrong and recovery is not possible.
+ * For example, no header was found for a format which depends
+ * on headers or an illegal combination of parameters is used.
+ */
+#define AV_LOG_FATAL     8
+/**
+ * Something went wrong and cannot losslessly be recovered.
+ * However, not all future data is affected.
+ */
+#define AV_LOG_ERROR    16
+/**
+ * Something somehow does not look correct. This may or may not
+ * lead to problems. An example would be the use of '-vstrict -2'.
+ */
+#define AV_LOG_WARNING  24
+#define AV_LOG_INFO     32
+#define AV_LOG_VERBOSE  40
+/**
+ * Stuff which is only useful for libav* developers.
+ */
+#define AV_LOG_DEBUG    48
+/**
+ * Sends the specified message to the log if the level is less than or equal
+ * to the current av_log_level. By default, all logging messages are sent to
+ * stderr. This behavior can be altered by setting a different av_vlog callback
+ * function.
+ *
+ * @param avcl A pointer to an arbitrary struct of which the first field is a
+ * pointer to an AVClass struct.
+ * @param level The importance level of the message, lower values signifying
+ * higher importance.
+ * @param fmt The format string (printf-compatible) that specifies how
+ * subsequent arguments are converted to output.
+ * @see av_vlog
+ */
+#ifdef __GNUC__
+void av_log(void*, int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 3, 4)));
+#else
+void av_log(void*, int level, const char *fmt, ...);
+#endif
+void av_vlog(void*, int level, const char *fmt, va_list);
+int av_log_get_level(void);
+void av_log_set_level(int);
+void av_log_set_callback(void (*)(void*, int, const char*, va_list));
+void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl);
+#endif /* AVUTIL_LOG_H */
diff --git a/apps/codecs/libwmapro/libavutil/mathematics.c b/apps/codecs/libwmapro/libavutil/mathematics.c
new file mode 100644
index 0000000000..6c04db78af
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/mathematics.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2005 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/mathematics.c
+ * miscellaneous math routines and tables
+ */
+#include <assert.h>
+#include <stdint.h>
+#include <limits.h>
+#include "mathematics.h"
+const uint8_t ff_sqrt_tab[256]={
+  0, 16, 23, 28, 32, 36, 40, 43, 46, 48, 51, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 77, 79, 80, 82, 84, 85, 87, 88, 90,
+ 91, 92, 94, 95, 96, 98, 99,100,102,103,104,105,107,108,109,110,111,112,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,144,145,146,147,148,149,150,151,151,152,153,154,155,156,156,
+157,158,159,160,160,161,162,163,164,164,165,166,167,168,168,169,170,171,171,172,173,174,174,175,176,176,177,178,179,179,180,181,
+182,182,183,184,184,185,186,186,187,188,188,189,190,190,191,192,192,193,194,194,195,196,196,197,198,198,199,200,200,201,202,202,
+203,204,204,205,205,206,207,207,208,208,209,210,210,211,212,212,213,213,214,215,215,216,216,217,218,218,219,219,220,220,221,222,
+222,223,223,224,224,225,226,226,227,227,228,228,229,230,230,231,231,232,232,233,233,234,235,235,236,236,237,237,238,238,239,239,
+240,240,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255,255,255
+};
+const uint8_t ff_log2_tab[256]={
+        0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+        5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+        6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+        6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+const uint8_t av_reverse[256]={
+0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0,
+0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8,
+0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4,
+0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC,0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC,
+0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2,0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2,
+0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA,0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA,
+0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6,0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6,
+0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE,0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE,
+0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1,0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1,
+0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9,0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9,
+0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5,0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5,
+0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED,0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD,
+0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3,0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3,
+0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB,0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB,
+0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7,0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7,
+0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF,0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF,
+};
+int64_t av_gcd(int64_t a, int64_t b){
+    if(b) return av_gcd(b, a%b);
+    else  return a;
+}
+int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd){
+    int64_t r=0;
+    assert(c > 0);
+    assert(b >=0);
+    assert(rnd >=0 && rnd<=5 && rnd!=4);
+    if(a<0 && a != INT64_MIN) return -av_rescale_rnd(-a, b, c, rnd ^ ((rnd>>1)&1));
+    if(rnd==AV_ROUND_NEAR_INF) r= c/2;
+    else if(rnd&1)             r= c-1;
+    if(b<=INT_MAX && c<=INT_MAX){
+        if(a<=INT_MAX)
+            return (a * b + r)/c;
+        else
+            return a/c*b + (a%c*b + r)/c;
+    }else{
+#if 1
+        uint64_t a0= a&0xFFFFFFFF;
+        uint64_t a1= a>>32;
+        uint64_t b0= b&0xFFFFFFFF;
+        uint64_t b1= b>>32;
+        uint64_t t1= a0*b1 + a1*b0;
+        uint64_t t1a= t1<<32;
+        int i;
+        a0 = a0*b0 + t1a;
+        a1 = a1*b1 + (t1>>32) + (a0<t1a);
+        a0 += r;
+        a1 += a0<r;
+        for(i=63; i>=0; i--){
+//            int o= a1 & 0x8000000000000000ULL;
+            a1+= a1 + ((a0>>i)&1);
+            t1+=t1;
+            if(/*o || */c <= a1){
+                a1 -= c;
+                t1++;
+            }
+        }
+        return t1;
+    }
+#else
+        AVInteger ai;
+        ai= av_mul_i(av_int2i(a), av_int2i(b));
+        ai= av_add_i(ai, av_int2i(r));
+        return av_i2int(av_div_i(ai, av_int2i(c)));
+    }
+#endif
+}
+int64_t av_rescale(int64_t a, int64_t b, int64_t c){
+    return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
+}
+#if 0
+int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq){
+    int64_t b= bq.num * (int64_t)cq.den;
+    int64_t c= cq.num * (int64_t)bq.den;
+    return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
+}
+int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b){
+    int64_t a= tb_a.num * (int64_t)tb_b.den;
+    int64_t b= tb_b.num * (int64_t)tb_a.den;
+    if (av_rescale_rnd(ts_a, a, b, AV_ROUND_DOWN) < ts_b) return -1;
+    if (av_rescale_rnd(ts_b, b, a, AV_ROUND_DOWN) < ts_a) return  1;
+    return 0;
+}
+#ifdef TEST
+#include "integer.h"
+#undef printf
+int main(void){
+    int64_t a,b,c,d,e;
+    for(a=7; a<(1LL<<62); a+=a/3+1){
+        for(b=3; b<(1LL<<62); b+=b/4+1){
+            for(c=9; c<(1LL<<62); c+=(c*2)/5+3){
+                int64_t r= c/2;
+                AVInteger ai;
+                ai= av_mul_i(av_int2i(a), av_int2i(b));
+                ai= av_add_i(ai, av_int2i(r));
+                d= av_i2int(av_div_i(ai, av_int2i(c)));
+                e= av_rescale(a,b,c);
+                if((double)a * (double)b / (double)c > (1LL<<63))
+                    continue;
+                if(d!=e) printf("%"PRId64"*%"PRId64"/%"PRId64"= %"PRId64"=%"PRId64"\n", a, b, c, d, e);
+            }
+        }
+    }
+    return 0;
+}
+#endif
+#endif /* 0 */
diff --git a/apps/codecs/libwmapro/libavutil/mathematics.h b/apps/codecs/libwmapro/libavutil/mathematics.h
new file mode 100644
index 0000000000..a09d3e9ad8
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/mathematics.h
@@ -0,0 +1,98 @@
+/*
+ * copyright (c) 2005 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVUTIL_MATHEMATICS_H
+#define AVUTIL_MATHEMATICS_H
+#include <stdint.h>
+#include <math.h>
+#include "attributes.h"
+//#include "rational.h"
+#ifndef M_E
+#define M_E            2.7182818284590452354   /* e */
+#endif
+#ifndef M_LN2
+#define M_LN2          0.69314718055994530942  /* log_e 2 */
+#endif
+#ifndef M_LN10
+#define M_LN10         2.30258509299404568402  /* log_e 10 */
+#endif
+#ifndef M_LOG2_10
+#define M_LOG2_10      3.32192809488736234787  /* log_2 10 */
+#endif
+#ifndef M_PI
+#define M_PI           3.14159265358979323846  /* pi */
+#endif
+#ifndef M_SQRT1_2
+#define M_SQRT1_2      0.70710678118654752440  /* 1/sqrt(2) */
+#endif
+#ifndef M_SQRT2
+#define M_SQRT2        1.41421356237309504880  /* sqrt(2) */
+#endif
+#ifndef NAN
+#define NAN            (0.0/0.0)
+#endif
+#ifndef INFINITY
+#define INFINITY       (1.0/0.0)
+#endif
+enum AVRounding {
+    AV_ROUND_ZERO     = 0, ///< Round toward zero.
+    AV_ROUND_INF      = 1, ///< Round away from zero.
+    AV_ROUND_DOWN     = 2, ///< Round toward -infinity.
+    AV_ROUND_UP       = 3, ///< Round toward +infinity.
+    AV_ROUND_NEAR_INF = 5, ///< Round to nearest and halfway cases away from zero.
+};
+/**
+ * Returns the greatest common divisor of a and b.
+ * If both a and b are 0 or either or both are <0 then behavior is
+ * undefined.
+ */
+int64_t av_const av_gcd(int64_t a, int64_t b);
+/**
+ * Rescales a 64-bit integer with rounding to nearest.
+ * A simple a*b/c isn't possible as it can overflow.
+ */
+int64_t av_rescale(int64_t a, int64_t b, int64_t c) av_const;
+/**
+ * Rescales a 64-bit integer with specified rounding.
+ * A simple a*b/c isn't possible as it can overflow.
+ */
+int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding) av_const;
+/**
+ * Rescales a 64-bit integer by 2 rational numbers.
+ */
+//int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq) av_const;
+/**
+ * Compares 2 timestamps each in its own timebases.
+ * The result of the function is undefined if one of the timestamps
+ * is outside the int64_t range when represented in the others timebase.
+ * @return -1 if ts_a is before ts_b, 1 if ts_a is after ts_b or 0 if they represent the same position
+ */
+//int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b);
+#endif /* AVUTIL_MATHEMATICS_H */
diff --git a/apps/codecs/libwmapro/libavutil/mem.c b/apps/codecs/libwmapro/libavutil/mem.c
new file mode 100644
index 0000000000..9721222e50
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/mem.c
@@ -0,0 +1,159 @@
+/*
+ * default memory allocator for libavutil
+ * Copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/mem.c
+ * default memory allocator for libavutil
+ */
+//#include "ffmpeg_config.h"
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+#include "mem.h"
+/* here we can use OS-dependent allocation functions */
+#undef free
+#undef malloc
+#undef realloc
+/* You can redefine av_malloc and av_free in your project to use your
+   memory allocator. You do not need to suppress this file because the
+   linker will do it automatically. */
+void *av_malloc(unsigned int size)
+{
+    void *ptr = NULL;
+#if CONFIG_MEMALIGN_HACK
+    long diff;
+#endif
+    /* let's disallow possible ambiguous cases */
+    if(size > (INT_MAX-16) )
+        return NULL;
+#if CONFIG_MEMALIGN_HACK
+    ptr = malloc(size+16);
+    if(!ptr)
+        return ptr;
+    diff= ((-(long)ptr - 1)&15) + 1;
+    ptr = (char*)ptr + diff;
+    ((char*)ptr)[-1]= diff;
+#elif HAVE_POSIX_MEMALIGN
+    if (posix_memalign(&ptr,16,size))
+        ptr = NULL;
+#elif HAVE_MEMALIGN
+    ptr = memalign(16,size);
+    /* Why 64?
+       Indeed, we should align it:
+         on 4 for 386
+         on 16 for 486
+         on 32 for 586, PPro - K6-III
+         on 64 for K7 (maybe for P3 too).
+       Because L1 and L2 caches are aligned on those values.
+       But I don't want to code such logic here!
+     */
+     /* Why 16?
+        Because some CPUs need alignment, for example SSE2 on P4, & most RISC CPUs
+        it will just trigger an exception and the unaligned load will be done in the
+        exception handler or it will just segfault (SSE2 on P4).
+        Why not larger? Because I did not see a difference in benchmarks ...
+     */
+     /* benchmarks with P3
+        memalign(64)+1          3071,3051,3032
+        memalign(64)+2          3051,3032,3041
+        memalign(64)+4          2911,2896,2915
+        memalign(64)+8          2545,2554,2550
+        memalign(64)+16         2543,2572,2563
+        memalign(64)+32         2546,2545,2571
+        memalign(64)+64         2570,2533,2558
+        BTW, malloc seems to do 8-byte alignment by default here.
+     */
+#else
+    ptr = malloc(size);
+#endif
+    return ptr;
+}
+void *av_realloc(void *ptr, unsigned int size)
+{
+#if CONFIG_MEMALIGN_HACK
+    int diff;
+#endif
+    /* let's disallow possible ambiguous cases */
+    if(size > (INT_MAX-16) )
+        return NULL;
+#if CONFIG_MEMALIGN_HACK
+    //FIXME this isn't aligned correctly, though it probably isn't needed
+    if(!ptr) return av_malloc(size);
+    diff= ((char*)ptr)[-1];
+    return (char*)realloc((char*)ptr - diff, size + diff) + diff;
+#else
+    return realloc(ptr, size);
+#endif
+}
+void av_free(void *ptr)
+{
+    /* XXX: this test should not be needed on most libcs */
+    if (ptr)
+#if CONFIG_MEMALIGN_HACK
+        free((char*)ptr - ((char*)ptr)[-1]);
+#else
+        free(ptr);
+#endif
+}
+void av_freep(void *arg)
+{
+    void **ptr= (void**)arg;
+    av_free(*ptr);
+    *ptr = NULL;
+}
+void *av_mallocz(unsigned int size)
+{
+    void *ptr = av_malloc(size);
+    if (ptr)
+        memset(ptr, 0, size);
+    return ptr;
+}
+char *av_strdup(const char *s)
+{
+    char *ptr= NULL;
+    if(s){
+        int len = strlen(s) + 1;
+        ptr = av_malloc(len);
+        if (ptr)
+            memcpy(ptr, s, len);
+    }
+    return ptr;
+}
diff --git a/apps/codecs/libwmapro/libavutil/mem.h b/apps/codecs/libwmapro/libavutil/mem.h
new file mode 100644
index 0000000000..e50553aefe
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/mem.h
@@ -0,0 +1,104 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavutil/mem.h
+ * memory handling functions
+ */
+#ifndef AVUTIL_MEM_H
+#define AVUTIL_MEM_H
+#include "common.h"
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+    #define av_malloc_attrib __attribute__((__malloc__))
+#else
+    #define av_malloc_attrib
+#endif
+#if (!defined(__ICC) || __ICC > 1100) && AV_GCC_VERSION_AT_LEAST(4,3)
+    #define av_alloc_size(n) __attribute__((alloc_size(n)))
+#else
+    #define av_alloc_size(n)
+#endif
+/**
+ * Allocates a block of \p size bytes with alignment suitable for all
+ * memory accesses (including vectors if available on the CPU).
+ * @param size Size in bytes for the memory block to be allocated.
+ * @return Pointer to the allocated block, NULL if the block cannot
+ * be allocated.
+ * @see av_mallocz()
+ */
+void *av_malloc(unsigned int size) av_malloc_attrib av_alloc_size(1);
+/**
+ * Allocates or reallocates a block of memory.
+ * If \p ptr is NULL and \p size > 0, allocates a new block. If \p
+ * size is zero, frees the memory block pointed to by \p ptr.
+ * @param size Size in bytes for the memory block to be allocated or
+ * reallocated.
+ * @param ptr Pointer to a memory block already allocated with
+ * av_malloc(z)() or av_realloc() or NULL.
+ * @return Pointer to a newly reallocated block or NULL if the block
+ * cannot be reallocated or the function is used to free the memory block.
+ * @see av_fast_realloc()
+ */
+void *av_realloc(void *ptr, unsigned int size) av_alloc_size(2);
+/**
+ * Frees a memory block which has been allocated with av_malloc(z)() or
+ * av_realloc().
+ * @param ptr Pointer to the memory block which should be freed.
+ * @note ptr = NULL is explicitly allowed.
+ * @note It is recommended that you use av_freep() instead.
+ * @see av_freep()
+ */
+void av_free(void *ptr);
+/**
+ * Allocates a block of \p size bytes with alignment suitable for all
+ * memory accesses (including vectors if available on the CPU) and
+ * zeroes all the bytes of the block.
+ * @param size Size in bytes for the memory block to be allocated.
+ * @return Pointer to the allocated block, NULL if it cannot be allocated.
+ * @see av_malloc()
+ */
+void *av_mallocz(unsigned int size) av_malloc_attrib av_alloc_size(1);
+/**
+ * Duplicates the string \p s.
+ * @param s string to be duplicated
+ * @return Pointer to a newly allocated string containing a
+ * copy of \p s or NULL if the string cannot be allocated.
+ */
+char *av_strdup(const char *s) av_malloc_attrib;
+/**
+ * Frees a memory block which has been allocated with av_malloc(z)() or
+ * av_realloc() and set the pointer pointing to it to NULL.
+ * @param ptr Pointer to the pointer to the memory block which should
+ * be freed.
+ * @see av_free()
+ */
+void av_freep(void *ptr);
+#endif /* AVUTIL_MEM_H */
diff --git a/apps/codecs/libwmapro/mathops.h b/apps/codecs/libwmapro/mathops.h
new file mode 100644
index 0000000000..149910bb1d
--- /dev/null
+++ b/apps/codecs/libwmapro/mathops.h
@@ -0,0 +1,150 @@
+/*
+ * simple math operations
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_MATHOPS_H
+#define AVCODEC_MATHOPS_H
+#include "libavutil/common.h"
+#if   ARCH_ARM
+#   include "arm/mathops.h"
+#elif ARCH_AVR32
+#   include "avr32/mathops.h"
+#elif ARCH_BFIN
+#   include "bfin/mathops.h"
+#elif ARCH_MIPS
+#   include "mips/mathops.h"
+#elif ARCH_PPC
+#   include "ppc/mathops.h"
+#elif ARCH_X86
+#   include "x86/mathops.h"
+#endif
+/* generic implementation */
+#ifndef MULL
+#   define MULL(a,b,s) (((int64_t)(a) * (int64_t)(b)) >> (s))
+#endif
+#ifndef MULH
+//gcc 3.4 creates an incredibly bloated mess out of this
+//#    define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32)
+static av_always_inline int MULH(int a, int b){
+    return ((int64_t)(a) * (int64_t)(b))>>32;
+}
+#endif
+#ifndef UMULH
+static av_always_inline unsigned UMULH(unsigned a, unsigned b){
+    return ((uint64_t)(a) * (uint64_t)(b))>>32;
+}
+#endif
+#ifndef MUL64
+#   define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
+#endif
+#ifndef MAC64
+#   define MAC64(d, a, b) ((d) += MUL64(a, b))
+#endif
+#ifndef MLS64
+#   define MLS64(d, a, b) ((d) -= MUL64(a, b))
+#endif
+/* signed 16x16 -> 32 multiply add accumulate */
+#ifndef MAC16
+#   define MAC16(rt, ra, rb) rt += (ra) * (rb)
+#endif
+/* signed 16x16 -> 32 multiply */
+#ifndef MUL16
+#   define MUL16(ra, rb) ((ra) * (rb))
+#endif
+#ifndef MLS16
+#   define MLS16(rt, ra, rb) ((rt) -= (ra) * (rb))
+#endif
+/* median of 3 */
+#ifndef mid_pred
+#define mid_pred mid_pred
+static inline av_const int mid_pred(int a, int b, int c)
+{
+#if 0
+    int t= (a-b)&((a-b)>>31);
+    a-=t;
+    b+=t;
+    b-= (b-c)&((b-c)>>31);
+    b+= (a-b)&((a-b)>>31);
+    return b;
+#else
+    if(a>b){
+        if(c>b){
+            if(c>a) b=a;
+            else    b=c;
+        }
+    }else{
+        if(b>c){
+            if(c>a) b=c;
+            else    b=a;
+        }
+    }
+    return b;
+#endif
+}
+#endif
+#ifndef sign_extend
+static inline av_const int sign_extend(int val, unsigned bits)
+{
+    return (val << (INT_BIT - bits)) >> (INT_BIT - bits);
+}
+#endif
+#ifndef zero_extend
+static inline av_const unsigned zero_extend(unsigned val, unsigned bits)
+{
+    return (val << (INT_BIT - bits)) >> (INT_BIT - bits);
+}
+#endif
+#ifndef COPY3_IF_LT
+#define COPY3_IF_LT(x, y, a, b, c, d)\
+if ((y) < (x)) {\
+    (x) = (y);\
+    (a) = (b);\
+    (c) = (d);\
+}
+#endif
+#ifndef NEG_SSR32
+#   define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
+#endif
+#ifndef NEG_USR32
+#   define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
+#endif
+#endif /* AVCODEC_MATHOPS_H */
diff --git a/apps/codecs/libwmapro/mdct.c b/apps/codecs/libwmapro/mdct.c
new file mode 100644
index 0000000000..9d0a59dc84
--- /dev/null
+++ b/apps/codecs/libwmapro/mdct.c
@@ -0,0 +1,232 @@
+/*
+ * MDCT/IMDCT transforms
+ * Copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <stdlib.h>
+#include <string.h>
+#include "libavutil/common.h"
+#include "libavutil/mathematics.h"
+#include "fft.h"
+/**
+ * @file libavcodec/mdct.c
+ * MDCT/IMDCT transforms.
+ */
+// Generate a Kaiser-Bessel Derived Window.
+#define BESSEL_I0_ITER 50 // default: 50 iterations of Bessel I0 approximation
+av_cold void ff_kbd_window_init(float *window, float alpha, int n)
+{
+   int i, j;
+   double sum = 0.0, bessel, tmp;
+   double local_window[n];
+   double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n);
+   for (i = 0; i < n; i++) {
+       tmp = i * (n - i) * alpha2;
+       bessel = 1.0;
+       for (j = BESSEL_I0_ITER; j > 0; j--)
+           bessel = bessel * tmp / (j * j) + 1;
+       sum += bessel;
+       local_window[i] = sum;
+   }
+   sum++;
+   for (i = 0; i < n; i++)
+       window[i] = sqrt(local_window[i] / sum);
+}
+#include "mdct_tablegen.h"
+/**
+ * init MDCT or IMDCT computation.
+ */
+av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
+{
+    int n, n4, i;
+    double alpha, theta;
+    int tstep;
+    memset(s, 0, sizeof(*s));
+    n = 1 << nbits;
+    s->mdct_bits = nbits;
+    s->mdct_size = n;
+    n4 = n >> 2;
+    s->permutation = FF_MDCT_PERM_NONE;
+    if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
+        goto fail;
+    s->tcos = av_malloc(n/2 * sizeof(FFTSample));
+    if (!s->tcos)
+        goto fail;
+    switch (s->permutation) {
+    case FF_MDCT_PERM_NONE:
+        s->tsin = s->tcos + n4;
+        tstep = 1;
+        break;
+    case FF_MDCT_PERM_INTERLEAVE:
+        s->tsin = s->tcos + 1;
+        tstep = 2;
+        break;
+    default:
+        goto fail;
+    }
+    theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0);
+    scale = sqrt(fabs(scale));
+    for(i=0;i<n4;i++) {
+        alpha = 2 * M_PI * (i + theta) / n;
+        s->tcos[i*tstep] = -cos(alpha) * scale;
+        s->tsin[i*tstep] = -sin(alpha) * scale;
+    }
+    return 0;
+ fail:
+    ff_mdct_end(s);
+    return -1;
+}
+/* complex multiplication: p = a * b */
+#define CMUL(pre, pim, are, aim, bre, bim) \
+{\
+    FFTSample _are = (are);\
+    FFTSample _aim = (aim);\
+    FFTSample _bre = (bre);\
+    FFTSample _bim = (bim);\
+    (pre) = _are * _bre - _aim * _bim;\
+    (pim) = _are * _bim + _aim * _bre;\
+}
+/**
+ * Compute the middle half of the inverse MDCT of size N = 2^nbits,
+ * thus excluding the parts that can be derived by symmetry
+ * @param output N/2 samples
+ * @param input N/2 samples
+ */
+void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k, n8, n4, n2, n, j;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2;
+    FFTComplex *z = (FFTComplex *)output;
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+    for(k = 0; k < n4; k++) {
+        j=revtab[k];
+        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
+        in1 += 2;
+        in2 -= 2;
+    }
+    ff_fft_calc(s, z);
+    /* post rotation + reordering */
+    for(k = 0; k < n8; k++) {
+        FFTSample r0, i0, r1, i1;
+        CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
+        CMUL(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
+        z[n8-k-1].re = r0;
+        z[n8-k-1].im = i0;
+        z[n8+k  ].re = r1;
+        z[n8+k  ].im = i1;
+    }
+}
+/**
+ * Compute inverse MDCT of size N = 2^nbits
+ * @param output N samples
+ * @param input N/2 samples
+ */
+void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k;
+    int n = 1 << s->mdct_bits;
+    int n2 = n >> 1;
+    int n4 = n >> 2;
+    ff_imdct_half_c(s, output+n4, input);
+    for(k = 0; k < n4; k++) {
+        output[k] = -output[n2-k-1];
+        output[n-k-1] = output[n2+k];
+    }
+}
+/**
+ * Compute MDCT of size N = 2^nbits
+ * @param input N samples
+ * @param out N/2 samples
+ */
+void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input)
+{
+    int i, j, n, n8, n4, n2, n3;
+    FFTSample re, im;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    FFTComplex *x = (FFTComplex *)out;
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+    n3 = 3 * n4;
+    /* pre rotation */
+    for(i=0;i<n8;i++) {
+        re = -input[2*i+3*n4] - input[n3-1-2*i];
+        im = -input[n4+2*i] + input[n4-1-2*i];
+        j = revtab[i];
+        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
+        re = input[2*i] - input[n2-1-2*i];
+        im = -(input[n2+2*i] + input[n-1-2*i]);
+        j = revtab[n8 + i];
+        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
+    }
+    ff_fft_calc(s, x);
+    /* post rotation */
+    for(i=0;i<n8;i++) {
+        FFTSample r0, i0, r1, i1;
+        CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
+        CMUL(i0, r1, x[n8+i  ].re, x[n8+i  ].im, -tsin[n8+i  ], -tcos[n8+i  ]);
+        x[n8-i-1].re = r0;
+        x[n8-i-1].im = i0;
+        x[n8+i  ].re = r1;
+        x[n8+i  ].im = i1;
+    }
+}
+av_cold void ff_mdct_end(FFTContext *s)
+{
+    av_freep(&s->tcos);
+    ff_fft_end(s);
+}
diff --git a/apps/codecs/libwmapro/mdct_tablegen.h b/apps/codecs/libwmapro/mdct_tablegen.h
new file mode 100644
index 0000000000..c4c3f95618
--- /dev/null
+++ b/apps/codecs/libwmapro/mdct_tablegen.h
@@ -0,0 +1,60 @@
+/*
+ * Header file for hardcoded MDCT tables
+ *
+ * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <assert.h>
+// do not use libavutil/mathematics.h since this is compiled both
+// for the host and the target and config.h is only valid for the target
+#include <math.h>
+//#include "../libavutil/attributes.h"
+#if !CONFIG_HARDCODED_TABLES
+SINETABLE(  32);
+SINETABLE(  64);
+SINETABLE( 128);
+SINETABLE( 256);
+SINETABLE( 512);
+SINETABLE(1024);
+SINETABLE(2048);
+SINETABLE(4096);
+#else
+#include "libavcodec/mdct_tables.h"
+#endif
+SINETABLE_CONST float * const ff_sine_windows[] = {
+    NULL, NULL, NULL, NULL, NULL, // unused
+    ff_sine_32 , ff_sine_64 ,
+    ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, ff_sine_2048, ff_sine_4096
+};
+// Generate a sine window.
+av_cold void ff_sine_window_init(float *window, int n) {
+    int i;
+    for(i = 0; i < n; i++)
+        window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n)));
+}
+av_cold void ff_init_ff_sine_windows(int index) {
+    assert(index >= 0 && index < FF_ARRAY_ELEMS(ff_sine_windows));
+#if !CONFIG_HARDCODED_TABLES
+    ff_sine_window_init(ff_sine_windows[index], 1 << index);
+#endif
+}
diff --git a/apps/codecs/libwmapro/put_bits.h b/apps/codecs/libwmapro/put_bits.h
new file mode 100644
index 0000000000..c0139661a5
--- /dev/null
+++ b/apps/codecs/libwmapro/put_bits.h
@@ -0,0 +1,343 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/put_bits.h
+ * bitstream writer API
+ */
+#ifndef AVCODEC_PUT_BITS_H
+#define AVCODEC_PUT_BITS_H
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "libavutil/bswap.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+#include "mathops.h"
+//#define ALT_BITSTREAM_WRITER
+//#define ALIGNED_BITSTREAM_WRITER
+/* buf and buf_end must be present and used by every alternative writer. */
+typedef struct PutBitContext {
+#ifdef ALT_BITSTREAM_WRITER
+    uint8_t *buf, *buf_end;
+    int index;
+#else
+    uint32_t bit_buf;
+    int bit_left;
+    uint8_t *buf, *buf_ptr, *buf_end;
+#endif
+    int size_in_bits;
+} PutBitContext;
+/**
+ * Initializes the PutBitContext s.
+ *
+ * @param buffer the buffer where to put bits
+ * @param buffer_size the size in bytes of buffer
+ */
+static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
+{
+    if(buffer_size < 0) {
+        buffer_size = 0;
+        buffer = NULL;
+    }
+    s->size_in_bits= 8*buffer_size;
+    s->buf = buffer;
+    s->buf_end = s->buf + buffer_size;
+#ifdef ALT_BITSTREAM_WRITER
+    s->index=0;
+    ((uint32_t*)(s->buf))[0]=0;
+//    memset(buffer, 0, buffer_size);
+#else
+    s->buf_ptr = s->buf;
+    s->bit_left=32;
+    s->bit_buf=0;
+#endif
+}
+/**
+ * Returns the total number of bits written to the bitstream.
+ */
+static inline int put_bits_count(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    return s->index;
+#else
+    return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left;
+#endif
+}
+/**
+ * Pads the end of the output stream with zeros.
+ */
+static inline void flush_put_bits(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    align_put_bits(s);
+#else
+#ifndef BITSTREAM_WRITER_LE
+    s->bit_buf<<= s->bit_left;
+#endif
+    while (s->bit_left < 32) {
+        /* XXX: should test end of buffer */
+#ifdef BITSTREAM_WRITER_LE
+        *s->buf_ptr++=s->bit_buf;
+        s->bit_buf>>=8;
+#else
+        *s->buf_ptr++=s->bit_buf >> 24;
+        s->bit_buf<<=8;
+#endif
+        s->bit_left+=8;
+    }
+    s->bit_left=32;
+    s->bit_buf=0;
+#endif
+}
+#if defined(ALT_BITSTREAM_WRITER) || defined(BITSTREAM_WRITER_LE)
+#define align_put_bits align_put_bits_unsupported_here
+#define ff_put_string ff_put_string_unsupported_here
+#define ff_copy_bits ff_copy_bits_unsupported_here
+#else
+/**
+ * Pads the bitstream with zeros up to the next byte boundary.
+ */
+void align_put_bits(PutBitContext *s);
+/**
+ * Puts the string string in the bitstream.
+ *
+ * @param terminate_string 0-terminates the written string if value is 1
+ */
+void ff_put_string(PutBitContext *pb, const char *string, int terminate_string);
+/**
+ * Copies the content of src to the bitstream.
+ *
+ * @param length the number of bits of src to copy
+ */
+void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length);
+#endif
+/**
+ * Writes up to 31 bits into a bitstream.
+ * Use put_bits32 to write 32 bits.
+ */
+static inline void put_bits(PutBitContext *s, int n, unsigned int value)
+#ifndef ALT_BITSTREAM_WRITER
+{
+    unsigned int bit_buf;
+    int bit_left;
+    //    printf("put_bits=%d %x\n", n, value);
+    assert(n <= 31 && value < (1U << n));
+    bit_buf = s->bit_buf;
+    bit_left = s->bit_left;
+    //    printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
+    /* XXX: optimize */
+#ifdef BITSTREAM_WRITER_LE
+    bit_buf |= value << (32 - bit_left);
+    if (n >= bit_left) {
+#if !HAVE_FAST_UNALIGNED
+        if (3 & (intptr_t) s->buf_ptr) {
+            AV_WL32(s->buf_ptr, bit_buf);
+        } else
+#endif
+        *(uint32_t *)s->buf_ptr = le2me_32(bit_buf);
+        s->buf_ptr+=4;
+        bit_buf = (bit_left==32)?0:value >> bit_left;
+        bit_left+=32;
+    }
+    bit_left-=n;
+#else
+    if (n < bit_left) {
+        bit_buf = (bit_buf<<n) | value;
+        bit_left-=n;
+    } else {
+        bit_buf<<=bit_left;
+        bit_buf |= value >> (n - bit_left);
+#if !HAVE_FAST_UNALIGNED
+        if (3 & (intptr_t) s->buf_ptr) {
+            AV_WB32(s->buf_ptr, bit_buf);
+        } else
+#endif
+        *(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
+        //printf("bitbuf = %08x\n", bit_buf);
+        s->buf_ptr+=4;
+        bit_left+=32 - n;
+        bit_buf = value;
+    }
+#endif
+    s->bit_buf = bit_buf;
+    s->bit_left = bit_left;
+}
+#else  /* ALT_BITSTREAM_WRITER defined */
+{
+#    ifdef ALIGNED_BITSTREAM_WRITER
+#        if ARCH_X86
+    __asm__ volatile(
+        "movl %0, %%ecx                 \n\t"
+        "xorl %%eax, %%eax              \n\t"
+        "shrdl %%cl, %1, %%eax          \n\t"
+        "shrl %%cl, %1                  \n\t"
+        "movl %0, %%ecx                 \n\t"
+        "shrl $3, %%ecx                 \n\t"
+        "andl $0xFFFFFFFC, %%ecx        \n\t"
+        "bswapl %1                      \n\t"
+        "orl %1, (%2, %%ecx)            \n\t"
+        "bswapl %%eax                   \n\t"
+        "addl %3, %0                    \n\t"
+        "movl %%eax, 4(%2, %%ecx)       \n\t"
+        : "=&r" (s->index), "=&r" (value)
+        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
+        : "%eax", "%ecx"
+    );
+#        else
+    int index= s->index;
+    uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5);
+    value<<= 32-n;
+    ptr[0] |= be2me_32(value>>(index&31));
+    ptr[1]  = be2me_32(value<<(32-(index&31)));
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#        endif
+#    else //ALIGNED_BITSTREAM_WRITER
+#        if ARCH_X86
+    __asm__ volatile(
+        "movl $7, %%ecx                 \n\t"
+        "andl %0, %%ecx                 \n\t"
+        "addl %3, %%ecx                 \n\t"
+        "negl %%ecx                     \n\t"
+        "shll %%cl, %1                  \n\t"
+        "bswapl %1                      \n\t"
+        "movl %0, %%ecx                 \n\t"
+        "shrl $3, %%ecx                 \n\t"
+        "orl %1, (%%ecx, %2)            \n\t"
+        "addl %3, %0                    \n\t"
+        "movl $0, 4(%%ecx, %2)          \n\t"
+        : "=&r" (s->index), "=&r" (value)
+        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
+        : "%ecx"
+    );
+#        else
+    int index= s->index;
+    uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3));
+    ptr[0] |= be2me_32(value<<(32-n-(index&7) ));
+    ptr[1] = 0;
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#        endif
+#    endif //!ALIGNED_BITSTREAM_WRITER
+}
+#endif
+static inline void put_sbits(PutBitContext *pb, int n, int32_t value)
+{
+    assert(n >= 0 && n <= 31);
+    put_bits(pb, n, value & ((1<<n)-1));
+}
+/**
+ * Writes exactly 32 bits into a bitstream.
+ */
+static void av_unused put_bits32(PutBitContext *s, uint32_t value)
+{
+    int lo = value & 0xffff;
+    int hi = value >> 16;
+#ifdef BITSTREAM_WRITER_LE
+    put_bits(s, 16, lo);
+    put_bits(s, 16, hi);
+#else
+    put_bits(s, 16, hi);
+    put_bits(s, 16, lo);
+#endif
+}
+/**
+ * Returns the pointer to the byte where the bitstream writer will put
+ * the next bit.
+ */
+static inline uint8_t* put_bits_ptr(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+        return s->buf + (s->index>>3);
+#else
+        return s->buf_ptr;
+#endif
+}
+/**
+ * Skips the given number of bytes.
+ * PutBitContext must be flushed & aligned to a byte boundary before calling this.
+ */
+static inline void skip_put_bytes(PutBitContext *s, int n)
+{
+        assert((put_bits_count(s)&7)==0);
+#ifdef ALT_BITSTREAM_WRITER
+        FIXME may need some cleaning of the buffer
+        s->index += n<<3;
+#else
+        assert(s->bit_left==32);
+        s->buf_ptr += n;
+#endif
+}
+/**
+ * Skips the given number of bits.
+ * Must only be used if the actual values in the bitstream do not matter.
+ * If n is 0 the behavior is undefined.
+ */
+static inline void skip_put_bits(PutBitContext *s, int n)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    s->index += n;
+#else
+    s->bit_left -= n;
+    s->buf_ptr-= 4*(s->bit_left>>5);
+    s->bit_left &= 31;
+#endif
+}
+/**
+ * Changes the end of the buffer.
+ *
+ * @param size the new size in bytes of the buffer where to put bits
+ */
+static inline void set_put_bits_buffer_size(PutBitContext *s, int size)
+{
+    s->buf_end= s->buf + size;
+}
+#endif /* AVCODEC_PUT_BITS_H */
diff --git a/apps/codecs/libwmapro/wma.c b/apps/codecs/libwmapro/wma.c
new file mode 100644
index 0000000000..5306634c7c
--- /dev/null
+++ b/apps/codecs/libwmapro/wma.c
@@ -0,0 +1,525 @@
+/*
+ * WMA compatible codec
+ * Copyright (c) 2002-2007 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+#include "wma.h"
+//#include "wmadata.h"
+#undef NDEBUG
+#include <assert.h>
+#if 0
+/* XXX: use same run/length optimization as mpeg decoders */
+//FIXME maybe split decode / encode or pass flag
+static void init_coef_vlc(VLC *vlc, uint16_t **prun_table,
+                          float **plevel_table, uint16_t **pint_table,
+                          const CoefVLCTable *vlc_table)
+{
+    int n = vlc_table->n;
+    const uint8_t  *table_bits   = vlc_table->huffbits;
+    const uint32_t *table_codes  = vlc_table->huffcodes;
+    const uint16_t *levels_table = vlc_table->levels;
+    uint16_t *run_table, *level_table, *int_table;
+    float *flevel_table;
+    int i, l, j, k, level;
+    init_vlc(vlc, VLCBITS, n, table_bits, 1, 1, table_codes, 4, 4, 0);
+    run_table   = av_malloc(n * sizeof(uint16_t));
+    level_table = av_malloc(n * sizeof(uint16_t));
+    flevel_table= av_malloc(n * sizeof(*flevel_table));
+    int_table   = av_malloc(n * sizeof(uint16_t));
+    i = 2;
+    level = 1;
+    k = 0;
+    while (i < n) {
+        int_table[k] = i;
+        l = levels_table[k++];
+        for (j = 0; j < l; j++) {
+            run_table[i]   = j;
+            level_table[i] = level;
+            flevel_table[i]= level;
+            i++;
+        }
+        level++;
+    }
+    *prun_table   = run_table;
+    *plevel_table = flevel_table;
+    *pint_table   = int_table;
+    av_free(level_table);
+}
+#endif /* 0 */
+/**
+ *@brief Get the samples per frame for this stream.
+ *@param sample_rate output sample_rate
+ *@param version wma version
+ *@param decode_flags codec compression features
+ *@return log2 of the number of output samples per frame
+ */
+int av_cold ff_wma_get_frame_len_bits(int sample_rate, int version,
+                                      unsigned int decode_flags)
+{
+    int frame_len_bits;
+    if (sample_rate <= 16000) {
+        frame_len_bits = 9;
+    } else if (sample_rate <= 22050 ||
+             (sample_rate <= 32000 && version == 1)) {
+        frame_len_bits = 10;
+    } else if (sample_rate <= 48000) {
+        frame_len_bits = 11;
+    } else if (sample_rate <= 96000) {
+        frame_len_bits = 12;
+    } else {
+        frame_len_bits = 13;
+    }
+    if (version == 3) {
+        int tmp = decode_flags & 0x6;
+        if (tmp == 0x2) {
+            ++frame_len_bits;
+        } else if (tmp == 0x4) {
+            --frame_len_bits;
+        } else if (tmp == 0x6) {
+            frame_len_bits -= 2;
+        }
+    }
+    return frame_len_bits;
+}
+#if 0
+int ff_wma_init(AVCodecContext *avctx, int flags2)
+{
+    WMACodecContext *s = avctx->priv_data;
+    int i;
+    float bps1, high_freq;
+    volatile float bps;
+    int sample_rate1;
+    int coef_vlc_table;
+    if (   avctx->sample_rate <= 0 || avctx->sample_rate > 50000
+        || avctx->channels    <= 0 || avctx->channels    > 8
+        || avctx->bit_rate    <= 0)
+        return -1;
+    s->sample_rate = avctx->sample_rate;
+    s->nb_channels = avctx->channels;
+    s->bit_rate    = avctx->bit_rate;
+    s->block_align = avctx->block_align;
+    dsputil_init(&s->dsp, avctx);
+    if (avctx->codec->id == CODEC_ID_WMAV1) {
+        s->version = 1;
+    } else {
+        s->version = 2;
+    }
+    /* compute MDCT block size */
+    s->frame_len_bits = ff_wma_get_frame_len_bits(s->sample_rate, s->version, 0);
+    s->frame_len = 1 << s->frame_len_bits;
+    if (s->use_variable_block_len) {
+        int nb_max, nb;
+        nb = ((flags2 >> 3) & 3) + 1;
+        if ((s->bit_rate / s->nb_channels) >= 32000)
+            nb += 2;
+        nb_max = s->frame_len_bits - BLOCK_MIN_BITS;
+        if (nb > nb_max)
+            nb = nb_max;
+        s->nb_block_sizes = nb + 1;
+    } else {
+        s->nb_block_sizes = 1;
+    }
+    /* init rate dependent parameters */
+    s->use_noise_coding = 1;
+    high_freq = s->sample_rate * 0.5;
+    /* if version 2, then the rates are normalized */
+    sample_rate1 = s->sample_rate;
+    if (s->version == 2) {
+        if (sample_rate1 >= 44100) {
+            sample_rate1 = 44100;
+        } else if (sample_rate1 >= 22050) {
+            sample_rate1 = 22050;
+        } else if (sample_rate1 >= 16000) {
+            sample_rate1 = 16000;
+        } else if (sample_rate1 >= 11025) {
+            sample_rate1 = 11025;
+        } else if (sample_rate1 >= 8000) {
+            sample_rate1 = 8000;
+        }
+    }
+    bps = (float)s->bit_rate / (float)(s->nb_channels * s->sample_rate);
+    s->byte_offset_bits = av_log2((int)(bps * s->frame_len / 8.0 + 0.5)) + 2;
+    /* compute high frequency value and choose if noise coding should
+       be activated */
+    bps1 = bps;
+    if (s->nb_channels == 2)
+        bps1 = bps * 1.6;
+    if (sample_rate1 == 44100) {
+        if (bps1 >= 0.61) {
+            s->use_noise_coding = 0;
+        } else {
+            high_freq = high_freq * 0.4;
+        }
+    } else if (sample_rate1 == 22050) {
+        if (bps1 >= 1.16) {
+            s->use_noise_coding = 0;
+        } else if (bps1 >= 0.72) {
+            high_freq = high_freq * 0.7;
+        } else {
+            high_freq = high_freq * 0.6;
+        }
+    } else if (sample_rate1 == 16000) {
+        if (bps > 0.5) {
+            high_freq = high_freq * 0.5;
+        } else {
+            high_freq = high_freq * 0.3;
+        }
+    } else if (sample_rate1 == 11025) {
+        high_freq = high_freq * 0.7;
+    } else if (sample_rate1 == 8000) {
+        if (bps <= 0.625) {
+            high_freq = high_freq * 0.5;
+        } else if (bps > 0.75) {
+            s->use_noise_coding = 0;
+        } else {
+            high_freq = high_freq * 0.65;
+        }
+    } else {
+        if (bps >= 0.8) {
+            high_freq = high_freq * 0.75;
+        } else if (bps >= 0.6) {
+            high_freq = high_freq * 0.6;
+        } else {
+            high_freq = high_freq * 0.5;
+        }
+    }
+    dprintf(s->avctx, "flags2=0x%x\n", flags2);
+    dprintf(s->avctx, "version=%d channels=%d sample_rate=%d bitrate=%d block_align=%d\n",
+            s->version, s->nb_channels, s->sample_rate, s->bit_rate,
+            s->block_align);
+    dprintf(s->avctx, "bps=%f bps1=%f high_freq=%f bitoffset=%d\n",
+            bps, bps1, high_freq, s->byte_offset_bits);
+    dprintf(s->avctx, "use_noise_coding=%d use_exp_vlc=%d nb_block_sizes=%d\n",
+            s->use_noise_coding, s->use_exp_vlc, s->nb_block_sizes);
+    /* compute the scale factor band sizes for each MDCT block size */
+    {
+        int a, b, pos, lpos, k, block_len, i, j, n;
+        const uint8_t *table;
+        if (s->version == 1) {
+            s->coefs_start = 3;
+        } else {
+            s->coefs_start = 0;
+        }
+        for (k = 0; k < s->nb_block_sizes; k++) {
+            block_len = s->frame_len >> k;
+            if (s->version == 1) {
+                lpos = 0;
+                for (i = 0; i < 25; i++) {
+                    a = ff_wma_critical_freqs[i];
+                    b = s->sample_rate;
+                    pos = ((block_len * 2 * a) + (b >> 1)) / b;
+                    if (pos > block_len)
+                        pos = block_len;
+                    s->exponent_bands[0][i] = pos - lpos;
+                    if (pos >= block_len) {
+                        i++;
+                        break;
+                    }
+                    lpos = pos;
+                }
+                s->exponent_sizes[0] = i;
+            } else {
+                /* hardcoded tables */
+                table = NULL;
+                a = s->frame_len_bits - BLOCK_MIN_BITS - k;
+                if (a < 3) {
+                    if (s->sample_rate >= 44100) {
+                        table = exponent_band_44100[a];
+                    } else if (s->sample_rate >= 32000) {
+                        table = exponent_band_32000[a];
+                    } else if (s->sample_rate >= 22050) {
+                        table = exponent_band_22050[a];
+                    }
+                }
+                if (table) {
+                    n = *table++;
+                    for (i = 0; i < n; i++)
+                        s->exponent_bands[k][i] = table[i];
+                    s->exponent_sizes[k] = n;
+                } else {
+                    j = 0;
+                    lpos = 0;
+                    for (i = 0; i < 25; i++) {
+                        a = ff_wma_critical_freqs[i];
+                        b = s->sample_rate;
+                        pos = ((block_len * 2 * a) + (b << 1)) / (4 * b);
+                        pos <<= 2;
+                        if (pos > block_len)
+                            pos = block_len;
+                        if (pos > lpos)
+                            s->exponent_bands[k][j++] = pos - lpos;
+                        if (pos >= block_len)
+                            break;
+                        lpos = pos;
+                    }
+                    s->exponent_sizes[k] = j;
+                }
+            }
+            /* max number of coefs */
+            s->coefs_end[k] = (s->frame_len - ((s->frame_len * 9) / 100)) >> k;
+            /* high freq computation */
+            s->high_band_start[k] = (int)((block_len * 2 * high_freq) /
+                                          s->sample_rate + 0.5);
+            n = s->exponent_sizes[k];
+            j = 0;
+            pos = 0;
+            for (i = 0; i < n; i++) {
+                int start, end;
+                start = pos;
+                pos += s->exponent_bands[k][i];
+                end = pos;
+                if (start < s->high_band_start[k])
+                    start = s->high_band_start[k];
+                if (end > s->coefs_end[k])
+                    end = s->coefs_end[k];
+                if (end > start)
+                    s->exponent_high_bands[k][j++] = end - start;
+            }
+            s->exponent_high_sizes[k] = j;
+#if 0
+            tprintf(s->avctx, "%5d: coefs_end=%d high_band_start=%d nb_high_bands=%d: ",
+                    s->frame_len >> k,
+                    s->coefs_end[k],
+                    s->high_band_start[k],
+                    s->exponent_high_sizes[k]);
+            for (j = 0; j < s->exponent_high_sizes[k]; j++)
+                tprintf(s->avctx, " %d", s->exponent_high_bands[k][j]);
+            tprintf(s->avctx, "\n");
+#endif
+        }
+    }
+#ifdef TRACE
+    {
+        int i, j;
+        for (i = 0; i < s->nb_block_sizes; i++) {
+            tprintf(s->avctx, "%5d: n=%2d:",
+                    s->frame_len >> i,
+                    s->exponent_sizes[i]);
+            for (j = 0; j < s->exponent_sizes[i]; j++)
+                tprintf(s->avctx, " %d", s->exponent_bands[i][j]);
+            tprintf(s->avctx, "\n");
+        }
+    }
+#endif
+    /* init MDCT windows : simple sinus window */
+    for (i = 0; i < s->nb_block_sizes; i++) {
+        ff_init_ff_sine_windows(s->frame_len_bits - i);
+        s->windows[i] = ff_sine_windows[s->frame_len_bits - i];
+    }
+    s->reset_block_lengths = 1;
+    if (s->use_noise_coding) {
+        /* init the noise generator */
+        if (s->use_exp_vlc) {
+            s->noise_mult = 0.02;
+        } else {
+            s->noise_mult = 0.04;
+        }
+#ifdef TRACE
+        for (i = 0; i < NOISE_TAB_SIZE; i++)
+            s->noise_table[i] = 1.0 * s->noise_mult;
+#else
+        {
+            unsigned int seed;
+            float norm;
+            seed = 1;
+            norm = (1.0 / (float)(1LL << 31)) * sqrt(3) * s->noise_mult;
+            for (i = 0; i < NOISE_TAB_SIZE; i++) {
+                seed = seed * 314159 + 1;
+                s->noise_table[i] = (float)((int)seed) * norm;
+            }
+        }
+#endif
+    }
+    /* choose the VLC tables for the coefficients */
+    coef_vlc_table = 2;
+    if (s->sample_rate >= 32000) {
+        if (bps1 < 0.72) {
+            coef_vlc_table = 0;
+        } else if (bps1 < 1.16) {
+            coef_vlc_table = 1;
+        }
+    }
+    s->coef_vlcs[0]= &coef_vlcs[coef_vlc_table * 2    ];
+    s->coef_vlcs[1]= &coef_vlcs[coef_vlc_table * 2 + 1];
+    init_coef_vlc(&s->coef_vlc[0], &s->run_table[0], &s->level_table[0], &s->int_table[0],
+                  s->coef_vlcs[0]);
+    init_coef_vlc(&s->coef_vlc[1], &s->run_table[1], &s->level_table[1], &s->int_table[1],
+                  s->coef_vlcs[1]);
+    return 0;
+}
+int ff_wma_total_gain_to_bits(int total_gain)
+{
+         if (total_gain < 15) return 13;
+    else if (total_gain < 32) return 12;
+    else if (total_gain < 40) return 11;
+    else if (total_gain < 45) return 10;
+    else                      return  9;
+}
+int ff_wma_end(AVCodecContext *avctx)
+{
+    WMACodecContext *s = avctx->priv_data;
+    int i;
+    for (i = 0; i < s->nb_block_sizes; i++)
+        ff_mdct_end(&s->mdct_ctx[i]);
+    if (s->use_exp_vlc) {
+        free_vlc(&s->exp_vlc);
+    }
+    if (s->use_noise_coding) {
+        free_vlc(&s->hgain_vlc);
+    }
+    for (i = 0; i < 2; i++) {
+        free_vlc(&s->coef_vlc[i]);
+        av_free(s->run_table[i]);
+        av_free(s->level_table[i]);
+        av_free(s->int_table[i]);
+    }
+    return 0;
+}
+#endif /* 0 */
+/**
+ * Decode an uncompressed coefficient.
+ * @param s codec context
+ * @return the decoded coefficient
+ */
+unsigned int ff_wma_get_large_val(GetBitContext* gb)
+{
+    /** consumes up to 34 bits */
+    int n_bits = 8;
+    /** decode length */
+    if (get_bits1(gb)) {
+        n_bits += 8;
+        if (get_bits1(gb)) {
+            n_bits += 8;
+            if (get_bits1(gb)) {
+                n_bits += 7;
+            }
+        }
+    }
+    return get_bits_long(gb, n_bits);
+}
+/**
+ * Decode run level compressed coefficients.
+ * @param avctx codec context
+ * @param gb bitstream reader context
+ * @param vlc vlc table for get_vlc2
+ * @param level_table level codes
+ * @param run_table run codes
+ * @param version 0 for wma1,2 1 for wmapro
+ * @param ptr output buffer
+ * @param offset offset in the output buffer
+ * @param num_coefs number of input coefficents
+ * @param block_len input buffer length (2^n)
+ * @param frame_len_bits number of bits for escaped run codes
+ * @param coef_nb_bits number of bits for escaped level codes
+ * @return 0 on success, -1 otherwise
+ */
+int ff_wma_run_level_decode(AVCodecContext* avctx, GetBitContext* gb,
+                            VLC *vlc,
+                            const float *level_table, const uint16_t *run_table,
+                            int version, WMACoef *ptr, int offset,
+                            int num_coefs, int block_len, int frame_len_bits,
+                            int coef_nb_bits)
+{
+    int code, level, sign;
+    const uint32_t *ilvl = (const uint32_t*)level_table;
+    uint32_t *iptr = (uint32_t*)ptr;
+    const unsigned int coef_mask = block_len - 1;
+    for (; offset < num_coefs; offset++) {
+        code = get_vlc2(gb, vlc->table, VLCBITS, VLCMAX);
+        if (code > 1) {
+            /** normal code */
+            offset += run_table[code];
+            sign = get_bits1(gb) - 1;
+            iptr[offset & coef_mask] = ilvl[code] ^ sign<<31;
+        } else if (code == 1) {
+            /** EOB */
+            break;
+        } else {
+            /** escape */
+            if (!version) {
+                level = get_bits(gb, coef_nb_bits);
+                /** NOTE: this is rather suboptimal. reading
+                    block_len_bits would be better */
+                offset += get_bits(gb, frame_len_bits);
+            } else {
+                level = ff_wma_get_large_val(gb);
+                /** escape decode */
+                if (get_bits1(gb)) {
+                    if (get_bits1(gb)) {
+                        if (get_bits1(gb)) {
+                            av_log(avctx,AV_LOG_ERROR,
+                                "broken escape sequence\n");
+                            return -1;
+                        } else
+                            offset += get_bits(gb, frame_len_bits) + 4;
+                    } else
+                        offset += get_bits(gb, 2) + 1;
+                }
+            }
+            sign = get_bits1(gb) - 1;
+            ptr[offset & coef_mask] = (level^sign) - sign;
+        }
+    }
+    /** NOTE: EOB can be omitted */
+    if (offset > num_coefs) {
+        av_log(avctx, AV_LOG_ERROR, "overflow in spectral RLE, ignoring\n");
+        return -1;
+    }
+    return 0;
+}
diff --git a/apps/codecs/libwmapro/wma.h b/apps/codecs/libwmapro/wma.h
new file mode 100644
index 0000000000..11274ad970
--- /dev/null
+++ b/apps/codecs/libwmapro/wma.h
@@ -0,0 +1,163 @@
+/*
+ * WMA compatible codec
+ * Copyright (c) 2002-2007 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_WMA_H
+#define AVCODEC_WMA_H
+#include "get_bits.h"
+#include "put_bits.h"
+#include "dsputil.h"
+#include "fft.h"
+/* size of blocks */
+#define BLOCK_MIN_BITS 7
+#define BLOCK_MAX_BITS 11
+#define BLOCK_MAX_SIZE (1 << BLOCK_MAX_BITS)
+#define BLOCK_NB_SIZES (BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1)
+/* XXX: find exact max size */
+#define HIGH_BAND_MAX_SIZE 16
+#define NB_LSP_COEFS 10
+/* XXX: is it a suitable value ? */
+#define MAX_CODED_SUPERFRAME_SIZE 16384
+#define MAX_CHANNELS 2
+#define NOISE_TAB_SIZE 8192
+#define LSP_POW_BITS 7
+//FIXME should be in wmadec
+#define VLCBITS 9
+#define VLCMAX ((22+VLCBITS-1)/VLCBITS)
+typedef float WMACoef;          ///< type for decoded coefficients, int16_t would be enough for wma 1/2
+typedef struct CoefVLCTable {
+    int n;                      ///< total number of codes
+    int max_level;
+    const uint32_t *huffcodes;  ///< VLC bit values
+    const uint8_t *huffbits;    ///< VLC bit size
+    const uint16_t *levels;     ///< table to build run/level tables
+} CoefVLCTable;
+typedef struct WMACodecContext {
+    AVCodecContext* avctx;
+    GetBitContext gb;
+    PutBitContext pb;
+    int sample_rate;
+    int nb_channels;
+    int bit_rate;
+    int version;                            ///< 1 = 0x160 (WMAV1), 2 = 0x161 (WMAV2)
+    int block_align;
+    int use_bit_reservoir;
+    int use_variable_block_len;
+    int use_exp_vlc;                        ///< exponent coding: 0 = lsp, 1 = vlc + delta
+    int use_noise_coding;                   ///< true if perceptual noise is added
+    int byte_offset_bits;
+    VLC exp_vlc;
+    int exponent_sizes[BLOCK_NB_SIZES];
+    uint16_t exponent_bands[BLOCK_NB_SIZES][25];
+    int high_band_start[BLOCK_NB_SIZES];    ///< index of first coef in high band
+    int coefs_start;                        ///< first coded coef
+    int coefs_end[BLOCK_NB_SIZES];          ///< max number of coded coefficients
+    int exponent_high_sizes[BLOCK_NB_SIZES];
+    int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE];
+    VLC hgain_vlc;
+    /* coded values in high bands */
+    int high_band_coded[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
+    int high_band_values[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
+    /* there are two possible tables for spectral coefficients */
+//FIXME the following 3 tables should be shared between decoders
+    VLC coef_vlc[2];
+    uint16_t *run_table[2];
+    float *level_table[2];
+    uint16_t *int_table[2];
+    const CoefVLCTable *coef_vlcs[2];
+    /* frame info */
+    int frame_len;                          ///< frame length in samples
+    int frame_len_bits;                     ///< frame_len = 1 << frame_len_bits
+    int nb_block_sizes;                     ///< number of block sizes
+    /* block info */
+    int reset_block_lengths;
+    int block_len_bits;                     ///< log2 of current block length
+    int next_block_len_bits;                ///< log2 of next block length
+    int prev_block_len_bits;                ///< log2 of prev block length
+    int block_len;                          ///< block length in samples
+    int block_num;                          ///< block number in current frame
+    int block_pos;                          ///< current position in frame
+    uint8_t ms_stereo;                      ///< true if mid/side stereo mode
+    uint8_t channel_coded[MAX_CHANNELS];    ///< true if channel is coded
+    int exponents_bsize[MAX_CHANNELS];      ///< log2 ratio frame/exp. length
+    DECLARE_ALIGNED(16, float, exponents)[MAX_CHANNELS][BLOCK_MAX_SIZE];
+    float max_exponent[MAX_CHANNELS];
+    WMACoef coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
+    DECLARE_ALIGNED(16, float, coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE];
+    DECLARE_ALIGNED(16, FFTSample, output)[BLOCK_MAX_SIZE * 2];
+    FFTContext mdct_ctx[BLOCK_NB_SIZES];
+    float *windows[BLOCK_NB_SIZES];
+    /* output buffer for one frame and the last for IMDCT windowing */
+    DECLARE_ALIGNED(16, float, frame_out)[MAX_CHANNELS][BLOCK_MAX_SIZE * 2];
+    /* last frame info */
+    uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */
+    int last_bitoffset;
+    int last_superframe_len;
+    float noise_table[NOISE_TAB_SIZE];
+    int noise_index;
+    float noise_mult; /* XXX: suppress that and integrate it in the noise array */
+    /* lsp_to_curve tables */
+    float lsp_cos_table[BLOCK_MAX_SIZE];
+    float lsp_pow_e_table[256];
+    float lsp_pow_m_table1[(1 << LSP_POW_BITS)];
+    float lsp_pow_m_table2[(1 << LSP_POW_BITS)];
+    DSPContext dsp;
+#ifdef TRACE
+    int frame_count;
+#endif
+} WMACodecContext;
+extern const uint16_t ff_wma_critical_freqs[25];
+extern const uint16_t ff_wma_hgain_huffcodes[37];
+extern const uint8_t ff_wma_hgain_huffbits[37];
+extern const float ff_wma_lsp_codebook[NB_LSP_COEFS][16];
+extern const uint32_t ff_aac_scalefactor_code[121];
+extern const uint8_t  ff_aac_scalefactor_bits[121];
+int av_cold ff_wma_get_frame_len_bits(int sample_rate, int version,
+                                      unsigned int decode_flags);
+int ff_wma_init(AVCodecContext * avctx, int flags2);
+int ff_wma_total_gain_to_bits(int total_gain);
+int ff_wma_end(AVCodecContext *avctx);
+unsigned int ff_wma_get_large_val(GetBitContext* gb);
+int ff_wma_run_level_decode(AVCodecContext* avctx, GetBitContext* gb,
+                            VLC *vlc,
+                            const float *level_table, const uint16_t *run_table,
+                            int version, WMACoef *ptr, int offset,
+                            int num_coefs, int block_len, int frame_len_bits,
+                            int coef_nb_bits);
+#endif /* AVCODEC_WMA_H */
diff --git a/apps/codecs/libwmapro/wmaprodata.h b/apps/codecs/libwmapro/wmaprodata.h
new file mode 100644
index 0000000000..a1d186e0c2
--- /dev/null
+++ b/apps/codecs/libwmapro/wmaprodata.h
@@ -0,0 +1,604 @@
+/*
+ * WMA 9/3/PRO compatible decoder
+ * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
+ * Copyright (c) 2008 - 2009 Sascha Sommer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file  libavcodec/wmaprodata.h
+ * @brief tables for wmapro decoding
+ */
+#ifndef AVCODEC_WMAPRODATA_H
+#define AVCODEC_WMAPRODATA_H
+#include <stddef.h>
+#include <stdint.h>
+/**
+ * @brief frequencies to divide the frequency spectrum into scale factor bands
+ */
+static const uint16_t critical_freq[] = {
+     100,   200,    300,    400,    510,    630,    770,
+     920,  1080,   1270,   1480,   1720,   2000,   2320,
+    2700,  3150,   3700,   4400,   5300,   6400,   7700,
+    9500, 12000,  15500,  20675,  28575,  41375,  63875,
+};
+/**
+ * @name Huffman tables for DPCM-coded scale factors
+ * @{
+ */
+#define HUFF_SCALE_SIZE    121
+#define HUFF_SCALE_MAXBITS  19
+static const uint16_t scale_huffcodes[HUFF_SCALE_SIZE] = {
+    0xE639, 0xE6C2, 0xE6C1, 0xE6C0, 0xE63F, 0xE63E, 0xE63D, 0xE63C,
+    0xE63B, 0xE63A, 0xE638, 0xE637, 0xE636, 0xE635, 0xE634, 0xE632,
+    0xE633, 0xE620, 0x737B, 0xE610, 0xE611, 0xE612, 0xE613, 0xE614,
+    0xE615, 0xE616, 0xE617, 0xE618, 0xE619, 0xE61A, 0xE61B, 0xE61C,
+    0xE61D, 0xE61E, 0xE61F, 0xE6C3, 0xE621, 0xE622, 0xE623, 0xE624,
+    0xE625, 0xE626, 0xE627, 0xE628, 0xE629, 0xE62A, 0xE62B, 0xE62C,
+    0xE62D, 0xE62E, 0xE62F, 0xE630, 0xE631, 0x1CDF, 0x0E60, 0x0399,
+    0x00E7, 0x001D, 0x0000, 0x0001, 0x0001, 0x0001, 0x0002, 0x0006,
+    0x0002, 0x0007, 0x0006, 0x000F, 0x0038, 0x0072, 0x039A, 0xE6C4,
+    0xE6C5, 0xE6C6, 0xE6C7, 0xE6C8, 0xE6C9, 0xE6CA, 0xE6CB, 0xE6CC,
+    0xE6CD, 0xE6CE, 0xE6CF, 0xE6D0, 0xE6D1, 0xE6D2, 0xE6D3, 0xE6D4,
+    0xE6D5, 0xE6D6, 0xE6D7, 0xE6D8, 0xE6D9, 0xE6DA, 0xE6DB, 0xE6DC,
+    0xE6DD, 0xE6DE, 0xE6DF, 0xE6E0, 0xE6E1, 0xE6E2, 0xE6E3, 0xE6E4,
+    0xE6E5, 0xE6E6, 0xE6E7, 0xE6E8, 0xE6E9, 0xE6EA, 0xE6EB, 0xE6EC,
+    0xE6ED, 0xE6EE, 0xE6EF, 0xE6F0, 0xE6F1, 0xE6F2, 0xE6F3, 0xE6F4,
+    0xE6F5,
+};
+static const uint8_t scale_huffbits[HUFF_SCALE_SIZE] = {
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 18, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 16, 15, 13,
+    11,  8,  5,  2,  1,  3,  5,  6,
+     6,  7,  7,  7,  9, 10, 13, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19,
+};
+/** @} */
+/**
+ * @name Huffman, run and level tables for runlevel-coded scale factors
+ * @{
+ */
+#define HUFF_SCALE_RL_SIZE    120
+#define HUFF_SCALE_RL_MAXBITS  21
+static const uint32_t scale_rl_huffcodes[HUFF_SCALE_RL_SIZE] = {
+    0x00010C, 0x000001, 0x10FE2A, 0x000003, 0x000003, 0x000001, 0x000013,
+    0x000020, 0x000029, 0x000014, 0x000016, 0x000045, 0x000049, 0x00002F,
+    0x000042, 0x00008E, 0x00008F, 0x000129, 0x000009, 0x00000D, 0x0004AC,
+    0x00002C, 0x000561, 0x0002E6, 0x00087C, 0x0002E2, 0x00095C, 0x000018,
+    0x000001, 0x000016, 0x000044, 0x00002A, 0x000007, 0x000159, 0x000143,
+    0x000128, 0x00015A, 0x00012D, 0x00002B, 0x0000A0, 0x000142, 0x00012A,
+    0x0002EF, 0x0004AF, 0x00087D, 0x004AE9, 0x0043F9, 0x000067, 0x000199,
+    0x002B05, 0x001583, 0x0021FE, 0x10FE2C, 0x000004, 0x00002E, 0x00010D,
+    0x00000A, 0x000244, 0x000017, 0x000245, 0x000011, 0x00010E, 0x00012C,
+    0x00002A, 0x00002F, 0x000121, 0x000046, 0x00087E, 0x0000BA, 0x000032,
+    0x0087F0, 0x0056DC, 0x0002EC, 0x0043FA, 0x002B6F, 0x004AE8, 0x0002B7,
+    0x10FE2B, 0x000001, 0x000051, 0x000010, 0x0002EE, 0x000B9C, 0x002576,
+    0x000198, 0x0056DD, 0x0000CD, 0x000AC0, 0x000170, 0x004AEF, 0x00002D,
+    0x0004AD, 0x0021FF, 0x0005CF, 0x002B04, 0x10FE29, 0x10FE28, 0x0002ED,
+    0x002E74, 0x021FC4, 0x004AEE, 0x010FE3, 0x087F17, 0x000000, 0x000097,
+    0x0002E3, 0x000ADA, 0x002575, 0x00173B, 0x0043FB, 0x002E75, 0x10FE2D,
+    0x0015B6, 0x00056C, 0x000057, 0x000123, 0x000120, 0x00021E, 0x000172,
+    0x0002B1,
+};
+static const uint8_t scale_rl_huffbits[HUFF_SCALE_RL_SIZE] = {
+     9,  2, 21,  2,  4,  5,  5,
+     6,  6,  7,  7,  7,  7,  6,
+     7,  8,  8,  9, 10, 10, 11,
+    12, 11, 12, 12, 12, 12, 11,
+     4,  5,  7,  8,  9,  9,  9,
+     9,  9,  9,  8,  8,  9,  9,
+    12, 11, 12, 15, 15, 13, 15,
+    14, 13, 14, 21,  5,  6,  9,
+    10, 10, 11, 10, 11,  9,  9,
+     6,  8,  9,  7, 12, 10, 12,
+    16, 15, 12, 15, 14, 15, 10,
+    21,  6,  7, 11, 12, 14, 14,
+    15, 15, 14, 12, 11, 15, 12,
+    11, 14, 13, 14, 21, 21, 12,
+    16, 18, 15, 17, 20,  7,  8,
+    12, 12, 14, 15, 15, 16, 21,
+    13, 11,  7,  9,  9, 10, 11,
+    10,
+};
+static const uint8_t scale_rl_run[HUFF_SCALE_RL_SIZE] = {
+     0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+    10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,  0,  1,  2,  3,
+     4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+    23, 24,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+    17, 18, 19, 20, 21, 22, 23, 24,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,
+     0,  1,  0,  1,  0,  1,
+};
+static const uint8_t scale_rl_level[HUFF_SCALE_RL_SIZE] = {
+     0,  0,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+     3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,
+     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+     4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+     5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+     7,  7,  8,  8,  9,  9,
+};
+/** @} */
+/**
+ * @name Huffman, run and level codes for runlevel-coded coefficients
+ * @{
+ */
+#define HUFF_COEF0_SIZE    272
+#define HUFF_COEF0_MAXBITS  21
+static const uint32_t coef0_huffcodes[HUFF_COEF0_SIZE] = {
+    0x00004A, 0x00002B, 0x000000, 0x000003, 0x000006, 0x000009, 0x00000F,
+    0x000010, 0x000016, 0x000011, 0x000016, 0x000028, 0x00002F, 0x000026,
+    0x000029, 0x000045, 0x000055, 0x00005D, 0x000042, 0x00004E, 0x000051,
+    0x00005E, 0x00008D, 0x0000A8, 0x0000AD, 0x000080, 0x000096, 0x00009F,
+    0x0000AA, 0x0000BE, 0x00011C, 0x000153, 0x000158, 0x000170, 0x000104,
+    0x00010D, 0x000105, 0x000103, 0x00012F, 0x000177, 0x000175, 0x000157,
+    0x000174, 0x000225, 0x00023B, 0x00020D, 0x00021F, 0x000281, 0x00027B,
+    0x000282, 0x0002AC, 0x0002FD, 0x00044F, 0x000478, 0x00044D, 0x0002EC,
+    0x00044E, 0x000564, 0x000409, 0x00040B, 0x000501, 0x000545, 0x0004F3,
+    0x000541, 0x00043B, 0x0004F1, 0x0004F4, 0x0008FD, 0x000A94, 0x000811,
+    0x000B88, 0x000B91, 0x000B93, 0x0008EA, 0x000899, 0x000B8A, 0x000972,
+    0x0009E5, 0x000A8F, 0x000A84, 0x000A8E, 0x000A00, 0x000830, 0x0008E8,
+    0x000B95, 0x000871, 0x00083A, 0x000814, 0x000873, 0x000BFE, 0x001728,
+    0x001595, 0x001712, 0x00102A, 0x001021, 0x001729, 0x00152E, 0x0013C3,
+    0x001721, 0x001597, 0x00151B, 0x0010F2, 0x001403, 0x001703, 0x001503,
+    0x001708, 0x0013C1, 0x00170E, 0x00170C, 0x0010E1, 0x0011EA, 0x001020,
+    0x001500, 0x0017FA, 0x001704, 0x001705, 0x0017F0, 0x0017FB, 0x0021E6,
+    0x002B2D, 0x0020C6, 0x002B29, 0x002E4A, 0x0023AC, 0x001519, 0x0023F3,
+    0x002B2C, 0x0021C0, 0x0017FE, 0x0023D7, 0x0017F9, 0x0012E7, 0x0013C0,
+    0x002261, 0x0023D3, 0x002057, 0x002056, 0x0021D2, 0x0020C7, 0x0023D2,
+    0x0020EC, 0x0044C0, 0x002FE2, 0x00475B, 0x002A03, 0x002FE3, 0x0021E2,
+    0x0021D0, 0x002A31, 0x002E13, 0x002E05, 0x0047E5, 0x00000E, 0x000024,
+    0x000088, 0x0000B9, 0x00010C, 0x000224, 0x0002B3, 0x000283, 0x0002ED,
+    0x00047B, 0x00041E, 0x00043D, 0x0004F5, 0x0005FD, 0x000A92, 0x000B96,
+    0x000838, 0x000971, 0x000B83, 0x000B80, 0x000BF9, 0x0011D3, 0x0011E8,
+    0x0011D7, 0x001527, 0x0011F8, 0x001073, 0x0010F0, 0x0010E4, 0x0017F8,
+    0x001062, 0x001402, 0x0017E3, 0x00151A, 0x001077, 0x00152B, 0x00170D,
+    0x0021D3, 0x002E41, 0x0013C2, 0x000029, 0x0000A9, 0x00025D, 0x000419,
+    0x000544, 0x000B8B, 0x0009E4, 0x0011D2, 0x001526, 0x001724, 0x0012E6,
+    0x00150B, 0x0017FF, 0x002E26, 0x002E4B, 0x002B28, 0x0021E3, 0x002A14,
+    0x00475A, 0x002E12, 0x000057, 0x00023E, 0x000A90, 0x000BF0, 0x001072,
+    0x001502, 0x0023D6, 0x0020ED, 0x002A30, 0x0044C7, 0x00008C, 0x00047F,
+    0x00152A, 0x002262, 0x002E04, 0x0000A1, 0x0005F9, 0x000173, 0x000875,
+    0x000171, 0x00152D, 0x0002E3, 0x0017E2, 0x0002AD, 0x0021C1, 0x000479,
+    0x0021E7, 0x00041F, 0x005C4E, 0x000543, 0x005C4F, 0x000A91, 0x00898D,
+    0x000B97, 0x008746, 0x000970, 0x008745, 0x000B85, 0x00A856, 0x00152F,
+    0x010E8E, 0x0010E5, 0x00A857, 0x00170F, 0x021D11, 0x002A58, 0x010E8F,
+    0x002E40, 0x021D13, 0x002A59, 0x043A25, 0x002A02, 0x043A21, 0x0044C1,
+    0x087448, 0x0047E4, 0x043A20, 0x00542A, 0x087449, 0x00898C,
+};
+static const uint8_t coef0_huffbits[HUFF_COEF0_SIZE] = {
+     8,  7,  2,  3,  3,  4,  4,
+     5,  5,  6,  6,  6,  6,  7,
+     7,  7,  7,  7,  8,  8,  8,
+     8,  8,  8,  8,  9,  9,  9,
+     9,  9,  9,  9,  9,  9, 10,
+    10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11,
+    11, 11, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 13,
+    12, 12, 12, 12, 12, 12, 13,
+    13, 13, 13, 13, 13, 13, 12,
+    12, 13, 13, 13, 13, 13, 13,
+    13, 13, 14, 14, 13, 13, 14,
+    13, 13, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 13, 14,
+    14, 14, 14, 14, 14, 14, 15,
+    14, 15, 14, 14, 14, 14, 14,
+    14, 15, 14, 14, 14, 14, 14,
+    14, 14, 15, 15, 15, 15, 14,
+    15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15,  4,  7,
+     8,  9, 10, 10, 10, 11, 11,
+    11, 12, 12, 12, 12, 12, 12,
+    13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 13, 14,
+    15, 14, 14,  6,  9, 11, 12,
+    12, 12, 13, 13, 13, 13, 14,
+    14, 14, 14, 14, 14, 15, 15,
+    15, 15,  7, 10, 12, 13, 14,
+    14, 14, 15, 15, 15,  8, 11,
+    13, 14, 15,  9, 12,  9, 13,
+    10, 13, 10, 14, 11, 15, 11,
+    15, 12, 15, 12, 15, 12, 16,
+    12, 17, 13, 17, 13, 17, 13,
+    18, 14, 17, 14, 19, 14, 18,
+    14, 19, 14, 20, 15, 20, 15,
+    21, 15, 20, 16, 21, 16,
+};
+#define HUFF_COEF1_SIZE    244
+#define HUFF_COEF1_MAXBITS  22
+static const uint32_t coef1_huffcodes[HUFF_COEF1_SIZE] = {
+    0x0001E2, 0x00007F, 0x000000, 0x000002, 0x000008, 0x00000E, 0x000019,
+    0x00002F, 0x000037, 0x000060, 0x00006C, 0x000095, 0x0000C6, 0x0000F0,
+    0x00012E, 0x000189, 0x0001A5, 0x0001F8, 0x000253, 0x00030A, 0x000344,
+    0x00034D, 0x0003F2, 0x0004BD, 0x0005D7, 0x00062A, 0x00068B, 0x000693,
+    0x000797, 0x00097D, 0x000BAB, 0x000C52, 0x000C5E, 0x000D21, 0x000D20,
+    0x000F1A, 0x000FCE, 0x000FD1, 0x0012F1, 0x001759, 0x0018AC, 0x0018A7,
+    0x0018BF, 0x001A2B, 0x001E52, 0x001E50, 0x001E31, 0x001FB8, 0x0025E6,
+    0x0025E7, 0x002EB4, 0x002EB7, 0x003169, 0x00315B, 0x00317C, 0x00316C,
+    0x0034CA, 0x00348D, 0x003F40, 0x003CA2, 0x003F76, 0x004BC3, 0x004BE5,
+    0x003F73, 0x004BF8, 0x004BF9, 0x006131, 0x00628B, 0x006289, 0x0062DA,
+    0x00628A, 0x0062D4, 0x006997, 0x0062B4, 0x006918, 0x00794D, 0x007E7B,
+    0x007E87, 0x007EEA, 0x00794E, 0x00699D, 0x007967, 0x00699F, 0x0062DB,
+    0x007E7A, 0x007EEB, 0x00BAC0, 0x0097C9, 0x00C537, 0x00C5AB, 0x00D233,
+    0x00D338, 0x00BAC1, 0x00D23D, 0x012F91, 0x00D339, 0x00FDC8, 0x00D23C,
+    0x00FDDC, 0x00FDC9, 0x00FDDD, 0x00D33C, 0x000003, 0x000016, 0x00003E,
+    0x0000C3, 0x0001A1, 0x000347, 0x00062E, 0x000BAA, 0x000F2D, 0x001A2A,
+    0x001E58, 0x00309B, 0x003CA3, 0x005D6A, 0x00629A, 0x006996, 0x00794F,
+    0x007EE5, 0x00BAD7, 0x00C5AA, 0x00C5F4, 0x00FDDF, 0x00FDDE, 0x018A20,
+    0x018A6D, 0x01A67B, 0x01A464, 0x025F21, 0x01F9E2, 0x01F9E3, 0x00000A,
+    0x00003D, 0x000128, 0x0003C7, 0x000C24, 0x0018A3, 0x002EB1, 0x003CB2,
+    0x00691F, 0x007E79, 0x000013, 0x0000BB, 0x00034E, 0x000D14, 0x0025FD,
+    0x004BE7, 0x000024, 0x000188, 0x0007EF, 0x000035, 0x000308, 0x0012F2,
+    0x00005C, 0x0003F6, 0x0025E0, 0x00006D, 0x000698, 0x000096, 0x000C25,
+    0x0000C7, 0x000F1B, 0x0000F3, 0x0012FF, 0x000174, 0x001A66, 0x0001A0,
+    0x003099, 0x0001E4, 0x00316B, 0x000252, 0x003F31, 0x00030B, 0x004BE6,
+    0x000346, 0x0062FB, 0x00034F, 0x007966, 0x0003F5, 0x007E86, 0x0005D4,
+    0x00C511, 0x00062C, 0x00C5F5, 0x000692, 0x00F299, 0x000795, 0x00F298,
+    0x0007E9, 0x018A21, 0x00097E, 0x0175AD, 0x000C27, 0x01A67A, 0x000C57,
+    0x02EB59, 0x000D22, 0x0314D9, 0x000F19, 0x03F3C2, 0x000FCD, 0x0348CB,
+    0x0012F8, 0x04BE41, 0x0018A0, 0x03F3C1, 0x0018A1, 0x04BE40, 0x0018B7,
+    0x0629B0, 0x001A64, 0x0D2329, 0x001E30, 0x03F3C3, 0x001F9F, 0x0BAD62,
+    0x001F99, 0x0FCF00, 0x00309A, 0x0629B1, 0x002EB6, 0x175AC3, 0x00314C,
+    0x069195, 0x003168, 0x0BAD63, 0x00348E, 0x175AC1, 0x003F30, 0x07E781,
+    0x003F41, 0x0D2328, 0x003F42, 0x1F9E03, 0x004BC2, 0x175AC2, 0x003F74,
+    0x175AC0, 0x005D61, 0x3F3C05, 0x006130, 0x3F3C04, 0x0062B5,
+};
+static const uint8_t coef1_huffbits[HUFF_COEF1_SIZE] = {
+     9,  7,  2,  3,  4,  4,  5,
+     6,  6,  7,  7,  8,  8,  8,
+     9,  9,  9,  9, 10, 10, 10,
+    10, 10, 11, 11, 11, 11, 11,
+    11, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 14,
+    14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 15, 15,
+    14, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15,
+    15, 15, 16, 16, 16, 16, 16,
+    16, 16, 16, 17, 16, 16, 16,
+    16, 16, 16, 16,  3,  5,  6,
+     8,  9, 10, 11, 12, 12, 13,
+    13, 14, 14, 15, 15, 15, 15,
+    15, 16, 16, 16, 16, 16, 17,
+    17, 17, 17, 18, 17, 17,  4,
+     6,  9, 10, 12, 13, 14, 14,
+    15, 15,  5,  8, 10, 12, 14,
+    15,  6,  9, 11,  6, 10, 13,
+     7, 10, 14,  7, 11,  8, 12,
+     8, 12,  8, 13,  9, 13,  9,
+    14,  9, 14, 10, 14, 10, 15,
+    10, 15, 10, 15, 10, 15, 11,
+    16, 11, 16, 11, 16, 11, 16,
+    11, 17, 12, 17, 12, 17, 12,
+    18, 12, 18, 12, 18, 12, 18,
+    13, 19, 13, 18, 13, 19, 13,
+    19, 13, 20, 13, 18, 13, 20,
+    13, 20, 14, 19, 14, 21, 14,
+    19, 14, 20, 14, 21, 14, 19,
+    14, 20, 14, 21, 15, 21, 14,
+    21, 15, 22, 15, 22, 15,
+};
+static const uint16_t coef0_run[HUFF_COEF0_SIZE] = {
+      0,   0,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,
+     12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
+     26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
+     40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,
+     54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,
+     68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,
+     82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
+     96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
+    110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123,
+    124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
+    138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,   0,   1,
+      2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
+     16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,   0,   1,   2,   3,
+      4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,
+     18,  19,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   0,   1,
+      2,   3,   4,   0,   1,   0,   1,   0,   1,   0,   1,   0,   1,   0,
+      1,   0,   1,   0,   1,   0,   1,   0,   1,   0,   1,   0,   1,   0,
+      1,   0,   1,   0,   1,   0,   1,   0,   1,   0,   1,   0,   1,   0,
+      1,   0,   1,   0,   1,   0,
+};
+static const float coef0_level[HUFF_COEF0_SIZE] = {
+      0,   0,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   2,   2,
+      2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,
+      2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,
+      2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   3,   3,   3,   3,
+      3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,
+      3,   3,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   5,   5,
+      5,   5,   5,   6,   6,   7,   7,   8,   8,   9,   9,  10,  10,  11,
+     11,  12,  12,  13,  13,  14,  14,  15,  15,  16,  16,  17,  17,  18,
+     18,  19,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  24,  25,
+     25,  26,  26,  27,  27,  28,
+};
+static const uint16_t coef1_run[HUFF_COEF1_SIZE] = {
+     0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+    34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
+    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
+    70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
+    88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,  0,  1,  2,  3,  4,  5,
+     6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+    24, 25, 26, 27, 28, 29,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  1,
+     2,  3,  4,  5,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  0,  1,  0,
+     1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,
+     1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,
+     1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,
+     1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,
+     1,  0,  1,  0,  1,  0,  1,  0,  0,  0,
+};
+static const float coef1_level[HUFF_COEF1_SIZE] = {
+     0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,
+     4,  4,  4,  4,  5,  5,  5,  6,  6,  6,  7,  7,  7,  8,  8,  9,  9, 10,
+    10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19,
+    19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28,
+    28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37,
+    37, 38, 38, 39, 39, 40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46,
+    46, 47, 47, 48, 48, 49, 49, 50, 51, 52,
+};
+/** @} */
+/**
+ * @name Huffman and vector lookup tables for vector-coded coefficients
+ * @{
+ */
+#define HUFF_VEC4_SIZE    127
+#define HUFF_VEC4_MAXBITS  14
+static const uint16_t vec4_huffcodes[HUFF_VEC4_SIZE] = {
+    0x0019, 0x0027, 0x00F2, 0x03BA, 0x0930, 0x1267, 0x0031, 0x0030,
+    0x0097, 0x0221, 0x058B, 0x0124, 0x00EB, 0x01D4, 0x03D8, 0x0584,
+    0x0364, 0x045F, 0x0F66, 0x0931, 0x24CD, 0x002F, 0x0039, 0x00E8,
+    0x02C3, 0x078A, 0x0037, 0x0029, 0x0084, 0x01B1, 0x00ED, 0x0086,
+    0x00F9, 0x03AB, 0x01EB, 0x08BC, 0x011E, 0x00F3, 0x0220, 0x058A,
+    0x00EC, 0x008E, 0x012B, 0x01EA, 0x0119, 0x04B0, 0x04B1, 0x03B8,
+    0x0691, 0x0365, 0x01ED, 0x049A, 0x0EA9, 0x0EA8, 0x08BD, 0x24CC,
+    0x0026, 0x0035, 0x00DB, 0x02C4, 0x07B2, 0x0038, 0x002B, 0x007F,
+    0x01B3, 0x00F4, 0x0091, 0x0116, 0x03BB, 0x0215, 0x0932, 0x002D,
+    0x002A, 0x008A, 0x01DE, 0x0028, 0x0020, 0x005C, 0x0090, 0x0068,
+    0x01EE, 0x00E9, 0x008D, 0x012A, 0x0087, 0x005D, 0x0118, 0x0349,
+    0x01EF, 0x01E3, 0x08B9, 0x00F0, 0x00D3, 0x0214, 0x049B, 0x00DA,
+    0x0089, 0x0125, 0x0217, 0x012D, 0x0690, 0x0094, 0x007D, 0x011F,
+    0x007E, 0x0059, 0x0127, 0x01A5, 0x0111, 0x00F8, 0x045D, 0x03B9,
+    0x0259, 0x0580, 0x02C1, 0x01DF, 0x0585, 0x0216, 0x0163, 0x01B0,
+    0x03C4, 0x08B8, 0x078B, 0x0755, 0x0581, 0x0F67, 0x0000,
+};
+static const uint8_t vec4_huffbits[HUFF_VEC4_SIZE] = {
+     5,  6,  8, 10, 12, 13,  6,  6,
+     8, 10, 11,  9,  8,  9, 10, 11,
+    10, 11, 12, 12, 14,  6,  6,  8,
+    10, 11,  6,  6,  8,  9,  8,  8,
+     8, 10,  9, 12,  9,  8, 10, 11,
+     8,  8,  9,  9,  9, 11, 11, 10,
+    11, 10,  9, 11, 12, 12, 12, 14,
+     6,  6,  8, 10, 11,  6,  6,  7,
+     9,  8,  8,  9, 10, 10, 12,  6,
+     6,  8,  9,  6,  6,  7,  8,  7,
+     9,  8,  8,  9,  8,  7,  9, 10,
+     9,  9, 12,  8,  8, 10, 11,  8,
+     8,  9, 10,  9, 11,  8,  7,  9,
+     7,  7,  9,  9,  9,  8, 11, 10,
+    10, 11, 10,  9, 11, 10,  9,  9,
+    10, 12, 11, 11, 11, 12,  1,
+};
+#define HUFF_VEC2_SIZE    137
+#define HUFF_VEC2_MAXBITS  12
+static const uint16_t vec2_huffcodes[HUFF_VEC2_SIZE] = {
+    0x055, 0x01C, 0x01A, 0x02B, 0x028, 0x067, 0x08B, 0x039,
+    0x170, 0x10D, 0x2A5, 0x047, 0x464, 0x697, 0x523, 0x8CB,
+    0x01B, 0x00E, 0x000, 0x010, 0x012, 0x036, 0x048, 0x04C,
+    0x0C2, 0x09B, 0x171, 0x03B, 0x224, 0x34A, 0x2D6, 0x019,
+    0x00F, 0x002, 0x014, 0x017, 0x006, 0x05D, 0x054, 0x0C7,
+    0x0B4, 0x192, 0x10E, 0x233, 0x043, 0x02C, 0x00F, 0x013,
+    0x006, 0x02F, 0x02C, 0x068, 0x077, 0x0DF, 0x111, 0x1A4,
+    0x16A, 0x2A4, 0x027, 0x011, 0x018, 0x02D, 0x00F, 0x04A,
+    0x040, 0x097, 0x01F, 0x11B, 0x022, 0x16D, 0x066, 0x035,
+    0x005, 0x02B, 0x049, 0x009, 0x075, 0x0CB, 0x0AA, 0x187,
+    0x106, 0x08A, 0x047, 0x060, 0x06E, 0x01D, 0x074, 0x0C4,
+    0x01E, 0x118, 0x1A7, 0x038, 0x042, 0x053, 0x076, 0x0A8,
+    0x0CA, 0x082, 0x110, 0x18D, 0x12D, 0x0B9, 0x0C8, 0x0DE,
+    0x01C, 0x0AB, 0x113, 0x18C, 0x10F, 0x09A, 0x0A5, 0x0B7,
+    0x11A, 0x186, 0x1A6, 0x259, 0x153, 0x18A, 0x193, 0x020,
+    0x10C, 0x046, 0x03A, 0x107, 0x149, 0x16C, 0x2D7, 0x225,
+    0x258, 0x316, 0x696, 0x317, 0x042, 0x522, 0x290, 0x8CA,
+    0x001,
+};
+static const uint8_t vec2_huffbits[HUFF_VEC2_SIZE] = {
+     7,  6,  6,  6,  7,  7,  8,  9,
+     9, 10, 10, 11, 11, 11, 12, 12,
+     6,  4,  5,  5,  6,  6,  7,  8,
+     8,  9,  9, 10, 10, 10, 11,  6,
+     4,  5,  5,  6,  7,  7,  8,  8,
+     9,  9, 10, 10, 11,  6,  5,  5,
+     6,  6,  7,  7,  8,  8,  9,  9,
+    10, 10,  7,  6,  6,  6,  7,  7,
+     8,  8,  9,  9, 10, 10,  7,  6,
+     7,  7,  7,  8,  8,  8,  9,  9,
+    10,  8,  7,  7,  7,  8,  8,  8,
+     9,  9,  9,  9,  8,  8,  8,  8,
+     8,  9,  9,  9,  9,  8,  8,  8,
+     9,  9,  9,  9, 10,  9,  9,  9,
+     9,  9,  9, 10,  9,  9,  9, 10,
+    10, 11, 10, 10, 10, 10, 11, 10,
+    10, 10, 11, 10, 11, 12, 11, 12,
+     3,
+};
+#define HUFF_VEC1_SIZE    101
+#define HUFF_VEC1_MAXBITS  11
+static const uint16_t vec1_huffcodes[HUFF_VEC1_SIZE] = {
+    0x01A, 0x003, 0x017, 0x010, 0x00C, 0x009, 0x005, 0x000,
+    0x00D, 0x00A, 0x009, 0x00C, 0x00F, 0x002, 0x004, 0x007,
+    0x00B, 0x00F, 0x01C, 0x006, 0x010, 0x015, 0x01C, 0x022,
+    0x03B, 0x00E, 0x019, 0x023, 0x034, 0x036, 0x03A, 0x047,
+    0x008, 0x00A, 0x01E, 0x031, 0x037, 0x050, 0x053, 0x06B,
+    0x06F, 0x08C, 0x0E8, 0x0EA, 0x0EB, 0x016, 0x03E, 0x03F,
+    0x06C, 0x089, 0x08A, 0x0A3, 0x0A4, 0x0D4, 0x0DD, 0x0EC,
+    0x0EE, 0x11A, 0x1D2, 0x024, 0x025, 0x02E, 0x027, 0x0C2,
+    0x0C0, 0x0DA, 0x0DB, 0x111, 0x144, 0x116, 0x14A, 0x145,
+    0x1B8, 0x1AB, 0x1DA, 0x1DE, 0x1DB, 0x1DF, 0x236, 0x237,
+    0x3A6, 0x3A7, 0x04D, 0x04C, 0x05E, 0x05F, 0x183, 0x182,
+    0x186, 0x221, 0x187, 0x220, 0x22E, 0x22F, 0x296, 0x354,
+    0x297, 0x355, 0x372, 0x373, 0x016,
+};
+static const uint8_t vec1_huffbits[HUFF_VEC1_SIZE] = {
+     7,  6,  5,  5,  5,  5,  5,  5,
+     4,  4,  4,  4,  4,  5,  5,  5,
+     5,  5,  5,  6,  6,  6,  6,  6,
+     6,  7,  7,  7,  7,  7,  7,  7,
+     8,  8,  8,  8,  8,  8,  8,  8,
+     8,  8,  8,  8,  8,  9,  9,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11,  5,
+};
+static const uint16_t symbol_to_vec4[HUFF_VEC4_SIZE] = {
+        0,    1,      2,     3,     4,     5,    16,    17,    18,    19,
+       20,   32,     33,    34,    35,    48,    49,    50,    64,    65,
+       80,   256,   257,   258,   259,   260,   272,   273,   274,   275,
+      288,   289,   290,   304,   305,   320,   512,   513,   514,   515,
+      528,   529,   530,   544,   545,   560,   768,   769,   770,   784,
+      785,   800,  1024,  1025,  1040,  1280,  4096,  4097,  4098,  4099,
+     4100,  4112,  4113,  4114,  4115,  4128,  4129,  4130,  4144,  4145,
+     4160,  4352,  4353,  4354,  4355,  4368,  4369,  4370,  4384,  4385,
+     4400,  4608,  4609,  4610,  4624,  4625,  4640,  4864,  4865,  4880,
+     5120,  8192,  8193,  8194,  8195,  8208,  8209,  8210,  8224,  8225,
+     8240,  8448,  8449,  8450,  8464,  8465,  8480,  8704,  8705,  8720,
+     8960, 12288, 12289, 12290, 12304, 12305, 12320, 12544, 12545, 12560,
+    12800, 16384, 16385, 16400, 16640, 20480,     0,
+};
+static const uint8_t symbol_to_vec2[HUFF_VEC2_SIZE] = {
+      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+     30,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,
+     48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  64,  65,
+     66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  80,  81,  82,  83,  84,
+     85,  86,  87,  88,  89,  90,  96,  97,  98,  99, 100, 101, 102, 103, 104,
+    105, 112, 113, 114, 115, 116, 117, 118, 119, 120, 128, 129, 130, 131, 132,
+    133, 134, 135, 144, 145, 146, 147, 148, 149, 150, 160, 161, 162, 163, 164,
+    165, 176, 177, 178, 179, 180, 192, 193, 194, 195, 208, 209, 210, 224, 225,
+    240,   0,
+};
+/** @} */
+/**
+ * @brief decorrelation matrix for multichannel streams
+ **/
+static const float default_decorrelation_matrices[] = {
+    1.000000,  0.707031, -0.707031,  0.707031,  0.707031,  0.578125,  0.707031,
+    0.410156,  0.578125, -0.707031,  0.410156,  0.578125,  0.000000, -0.816406,
+    0.500000,  0.652344,  0.500000,  0.269531,  0.500000,  0.269531, -0.500000,
+   -0.652344,  0.500000, -0.269531, -0.500000,  0.652344,  0.500000, -0.652344,
+    0.500000, -0.269531,  0.445312,  0.601562,  0.511719,  0.371094,  0.195312,
+    0.445312,  0.371094, -0.195312, -0.601562, -0.511719,  0.445312,  0.000000,
+   -0.632812,  0.000000,  0.632812,  0.445312, -0.371094, -0.195312,  0.601562,
+   -0.511719,  0.445312, -0.601562,  0.511719, -0.371094,  0.195312,  0.410156,
+    0.558594,  0.500000,  0.410156,  0.289062,  0.148438,  0.410156,  0.410156,
+    0.000000, -0.410156, -0.578125, -0.410156,  0.410156,  0.148438, -0.500000,
+   -0.410156,  0.289062,  0.558594,  0.410156, -0.148438, -0.500000,  0.410156,
+    0.289062, -0.558594,  0.410156, -0.410156,  0.000000,  0.410156, -0.578125,
+    0.410156,  0.410156, -0.558594,  0.500000, -0.410156,  0.289062, -0.148438,
+};
+/**
+ * @brief default decorrelation matrix offsets
+ */
+static const float * const default_decorrelation[] = {
+    NULL,
+    &default_decorrelation_matrices[0],
+    &default_decorrelation_matrices[1],
+    &default_decorrelation_matrices[5],
+    &default_decorrelation_matrices[14],
+    &default_decorrelation_matrices[30],
+    &default_decorrelation_matrices[55]
+};
+#endif /* AVCODEC_WMAPRODATA_H */
diff --git a/apps/codecs/libwmapro/wmaprodec.c b/apps/codecs/libwmapro/wmaprodec.c
new file mode 100644
index 0000000000..66d926d813
--- /dev/null
+++ b/apps/codecs/libwmapro/wmaprodec.c
@@ -0,0 +1,1594 @@
+/*
+ * Wmapro compatible decoder
+ * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
+ * Copyright (c) 2008 - 2009 Sascha Sommer, Benjamin Larsson
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file  libavcodec/wmaprodec.c
+ * @brief wmapro decoder implementation
+ * Wmapro is an MDCT based codec comparable to wma standard or AAC.
+ * The decoding therefore consists of the following steps:
+ * - bitstream decoding
+ * - reconstruction of per-channel data
+ * - rescaling and inverse quantization
+ * - IMDCT
+ * - windowing and overlapp-add
+ *
+ * The compressed wmapro bitstream is split into individual packets.
+ * Every such packet contains one or more wma frames.
+ * The compressed frames may have a variable length and frames may
+ * cross packet boundaries.
+ * Common to all wmapro frames is the number of samples that are stored in
+ * a frame.
+ * The number of samples and a few other decode flags are stored
+ * as extradata that has to be passed to the decoder.
+ *
+ * The wmapro frames themselves are again split into a variable number of
+ * subframes. Every subframe contains the data for 2^N time domain samples
+ * where N varies between 7 and 12.
+ *
+ * Example wmapro bitstream (in samples):
+ *
+ * ||   packet 0           || packet 1 || packet 2      packets
+ * ---------------------------------------------------
+ * || frame 0      || frame 1       || frame 2    ||    frames
+ * ---------------------------------------------------
+ * ||   |      |   ||   |   |   |   ||            ||    subframes of channel 0
+ * ---------------------------------------------------
+ * ||      |   |   ||   |   |   |   ||            ||    subframes of channel 1
+ * ---------------------------------------------------
+ *
+ * The frame layouts for the individual channels of a wma frame does not need
+ * to be the same.
+ *
+ * However, if the offsets and lengths of several subframes of a frame are the
+ * same, the subframes of the channels can be grouped.
+ * Every group may then use special coding techniques like M/S stereo coding
+ * to improve the compression ratio. These channel transformations do not
+ * need to be applied to a whole subframe. Instead, they can also work on
+ * individual scale factor bands (see below).
+ * The coefficients that carry the audio signal in the frequency domain
+ * are transmitted as huffman-coded vectors with 4, 2 and 1 elements.
+ * In addition to that, the encoder can switch to a runlevel coding scheme
+ * by transmitting subframe_length / 128 zero coefficients.
+ *
+ * Before the audio signal can be converted to the time domain, the
+ * coefficients have to be rescaled and inverse quantized.
+ * A subframe is therefore split into several scale factor bands that get
+ * scaled individually.
+ * Scale factors are submitted for every frame but they might be shared
+ * between the subframes of a channel. Scale factors are initially DPCM-coded.
+ * Once scale factors are shared, the differences are transmitted as runlevel
+ * codes.
+ * Every subframe length and offset combination in the frame layout shares a
+ * common quantization factor that can be adjusted for every channel by a
+ * modifier.
+ * After the inverse quantization, the coefficients get processed by an IMDCT.
+ * The resulting values are then windowed with a sine window and the first half
+ * of the values are added to the second half of the output from the previous
+ * subframe in order to reconstruct the output samples.
+ */
+#include "avcodec.h"
+#include "internal.h"
+#include "get_bits.h"
+#include "put_bits.h"
+#include "wmaprodata.h"
+#include "dsputil.h"
+#include "wma.h"
+/* Some defines to make it compile */
+#define AVERROR_INVALIDDATA  -1
+#define AVERROR_PATCHWELCOME -2
+#ifndef M_PI
+#define M_PI           3.14159265358979323846  /* pi */
+#endif
+#define av_log_ask_for_sample(...)
+/** current decoder limitations */
+#define WMAPRO_MAX_CHANNELS    8                             ///< max number of handled channels
+#define MAX_SUBFRAMES  32                                    ///< max number of subframes per channel
+#define MAX_BANDS      29                                    ///< max number of scale factor bands
+#define MAX_FRAMESIZE  32768                                 ///< maximum compressed frame size
+#define WMAPRO_BLOCK_MAX_BITS 12                                           ///< log2 of max block size
+#define WMAPRO_BLOCK_MAX_SIZE (1 << WMAPRO_BLOCK_MAX_BITS)                 ///< maximum block size
+#define WMAPRO_BLOCK_SIZES    (WMAPRO_BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) ///< possible block sizes
+#define VLCBITS            9
+#define SCALEVLCBITS       8
+#define VEC4MAXDEPTH    ((HUFF_VEC4_MAXBITS+VLCBITS-1)/VLCBITS)
+#define VEC2MAXDEPTH    ((HUFF_VEC2_MAXBITS+VLCBITS-1)/VLCBITS)
+#define VEC1MAXDEPTH    ((HUFF_VEC1_MAXBITS+VLCBITS-1)/VLCBITS)
+#define SCALEMAXDEPTH   ((HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS)
+#define SCALERLMAXDEPTH ((HUFF_SCALE_RL_MAXBITS+VLCBITS-1)/VLCBITS)
+static VLC              sf_vlc;           ///< scale factor DPCM vlc
+static VLC              sf_rl_vlc;        ///< scale factor run length vlc
+static VLC              vec4_vlc;         ///< 4 coefficients per symbol
+static VLC              vec2_vlc;         ///< 2 coefficients per symbol
+static VLC              vec1_vlc;         ///< 1 coefficient per symbol
+static VLC              coef_vlc[2];      ///< coefficient run length vlc codes
+static float            sin64[33];        ///< sinus table for decorrelation
+/**
+ * @brief frame specific decoder context for a single channel
+ */
+typedef struct {
+    int16_t  prev_block_len;                          ///< length of the previous block
+    uint8_t  transmit_coefs;
+    uint8_t  num_subframes;
+    uint16_t subframe_len[MAX_SUBFRAMES];             ///< subframe length in samples
+    uint16_t subframe_offset[MAX_SUBFRAMES];          ///< subframe positions in the current frame
+    uint8_t  cur_subframe;                            ///< current subframe number
+    uint16_t decoded_samples;                         ///< number of already processed samples
+    uint8_t  grouped;                                 ///< channel is part of a group
+    int      quant_step;                              ///< quantization step for the current subframe
+    int8_t   reuse_sf;                                ///< share scale factors between subframes
+    int8_t   scale_factor_step;                       ///< scaling step for the current subframe
+    int      max_scale_factor;                        ///< maximum scale factor for the current subframe
+    int      saved_scale_factors[2][MAX_BANDS];       ///< resampled and (previously) transmitted scale factor values
+    int8_t   scale_factor_idx;                        ///< index for the transmitted scale factor values (used for resampling)
+    int*     scale_factors;                           ///< pointer to the scale factor values used for decoding
+    uint8_t  table_idx;                               ///< index in sf_offsets for the scale factor reference block
+    float*   coeffs;                                  ///< pointer to the subframe decode buffer
+    DECLARE_ALIGNED(16, float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
+} WMAProChannelCtx;
+/**
+ * @brief channel group for channel transformations
+ */
+typedef struct {
+    uint8_t num_channels;                                     ///< number of channels in the group
+    int8_t  transform;                                        ///< transform on / off
+    int8_t  transform_band[MAX_BANDS];                        ///< controls if the transform is enabled for a certain band
+    float   decorrelation_matrix[WMAPRO_MAX_CHANNELS*WMAPRO_MAX_CHANNELS];
+    float*  channel_data[WMAPRO_MAX_CHANNELS];                ///< transformation coefficients
+} WMAProChannelGrp;
+/**
+ * @brief main decoder context
+ */
+typedef struct WMAProDecodeCtx {
+    /* generic decoder variables */
+    AVCodecContext*  avctx;                         ///< codec context for av_log
+    DSPContext       dsp;                           ///< accelerated DSP functions
+    uint8_t          frame_data[MAX_FRAMESIZE +
+                      FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
+    PutBitContext    pb;                            ///< context for filling the frame_data buffer
+    FFTContext       mdct_ctx[WMAPRO_BLOCK_SIZES];  ///< MDCT context per block size
+    DECLARE_ALIGNED(16, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
+    float*           windows[WMAPRO_BLOCK_SIZES];   ///< windows for the different block sizes
+    /* frame size dependent frame information (set during initialization) */
+    uint32_t         decode_flags;                  ///< used compression features
+    uint8_t          len_prefix;                    ///< frame is prefixed with its length
+    uint8_t          dynamic_range_compression;     ///< frame contains DRC data
+    uint8_t          bits_per_sample;               ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
+    uint16_t         samples_per_frame;             ///< number of samples to output
+    uint16_t         log2_frame_size;
+    int8_t           num_channels;                  ///< number of channels in the stream (same as AVCodecContext.num_channels)
+    int8_t           lfe_channel;                   ///< lfe channel index
+    uint8_t          max_num_subframes;
+    uint8_t          subframe_len_bits;             ///< number of bits used for the subframe length
+    uint8_t          max_subframe_len_bit;          ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
+    uint16_t         min_samples_per_subframe;
+    int8_t           num_sfb[WMAPRO_BLOCK_SIZES];   ///< scale factor bands per block size
+    int16_t          sfb_offsets[WMAPRO_BLOCK_SIZES][MAX_BANDS];                    ///< scale factor band offsets (multiples of 4)
+    int8_t           sf_offsets[WMAPRO_BLOCK_SIZES][WMAPRO_BLOCK_SIZES][MAX_BANDS]; ///< scale factor resample matrix
+    int16_t          subwoofer_cutoffs[WMAPRO_BLOCK_SIZES]; ///< subwoofer cutoff values
+    /* packet decode state */
+    GetBitContext    pgb;                           ///< bitstream reader context for the packet
+    uint8_t          packet_offset;                 ///< frame offset in the packet
+    uint8_t          packet_sequence_number;        ///< current packet number
+    int              num_saved_bits;                ///< saved number of bits
+    int              frame_offset;                  ///< frame offset in the bit reservoir
+    int              subframe_offset;               ///< subframe offset in the bit reservoir
+    uint8_t          packet_loss;                   ///< set in case of bitstream error
+    uint8_t          packet_done;                   ///< set when a packet is fully decoded
+    /* frame decode state */
+    uint32_t         frame_num;                     ///< current frame number (not used for decoding)
+    GetBitContext    gb;                            ///< bitstream reader context
+    int              buf_bit_size;                  ///< buffer size in bits
+    float*           samples;                       ///< current samplebuffer pointer
+    float*           samples_end;                   ///< maximum samplebuffer pointer
+    uint8_t          drc_gain;                      ///< gain for the DRC tool
+    int8_t           skip_frame;                    ///< skip output step
+    int8_t           parsed_all_subframes;          ///< all subframes decoded?
+    /* subframe/block decode state */
+    int16_t          subframe_len;                  ///< current subframe length
+    int8_t           channels_for_cur_subframe;     ///< number of channels that contain the subframe
+    int8_t           channel_indexes_for_cur_subframe[WMAPRO_MAX_CHANNELS];
+    int8_t           num_bands;                     ///< number of scale factor bands
+    int16_t*         cur_sfb_offsets;               ///< sfb offsets for the current block
+    uint8_t          table_idx;                     ///< index for the num_sfb, sfb_offsets, sf_offsets and subwoofer_cutoffs tables
+    int8_t           esc_len;                       ///< length of escaped coefficients
+    uint8_t          num_chgroups;                  ///< number of channel groups
+    WMAProChannelGrp chgroup[WMAPRO_MAX_CHANNELS];  ///< channel group information
+    WMAProChannelCtx channel[WMAPRO_MAX_CHANNELS];  ///< per channel data
+} WMAProDecodeCtx;
+/**
+ *@brief helper function to print the most important members of the context
+ *@param s context
+ */
+static void av_cold dump_context(WMAProDecodeCtx *s)
+{
+#define PRINT(a, b)     av_log(s->avctx, AV_LOG_DEBUG, " %s = %d\n", a, b);
+#define PRINT_HEX(a, b) av_log(s->avctx, AV_LOG_DEBUG, " %s = %x\n", a, b);
+    PRINT("ed sample bit depth", s->bits_per_sample);
+    PRINT_HEX("ed decode flags", s->decode_flags);
+    PRINT("samples per frame",   s->samples_per_frame);
+    PRINT("log2 frame size",     s->log2_frame_size);
+    PRINT("max num subframes",   s->max_num_subframes);
+    PRINT("len prefix",          s->len_prefix);
+    PRINT("num channels",        s->num_channels);
+}
+/**
+ *@brief Uninitialize the decoder and free all resources.
+ *@param avctx codec context
+ *@return 0 on success, < 0 otherwise
+ */
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+    WMAProDecodeCtx *s = avctx->priv_data;
+    int i;
+    for (i = 0; i < WMAPRO_BLOCK_SIZES; i++)
+        ff_mdct_end(&s->mdct_ctx[i]);
+    return 0;
+}
+/**
+ *@brief Initialize the decoder.
+ *@param avctx codec context
+ *@return 0 on success, -1 otherwise
+ */
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    WMAProDecodeCtx *s = avctx->priv_data;
+    uint8_t *edata_ptr = avctx->extradata;
+    unsigned int channel_mask;
+    int i;
+    int log2_max_num_subframes;
+    int num_possible_block_sizes;
+    s->avctx = avctx;
+    dsputil_init(&s->dsp, avctx);
+    init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
+    avctx->sample_fmt = SAMPLE_FMT_FLT;
+    if (avctx->extradata_size >= 18) {
+        s->decode_flags    = AV_RL16(edata_ptr+14);
+        channel_mask       = AV_RL32(edata_ptr+2);
+        s->bits_per_sample = AV_RL16(edata_ptr);
+        /** dump the extradata */
+        for (i = 0; i < avctx->extradata_size; i++)
+            dprintf(avctx, "[%x] ", avctx->extradata[i]);
+        dprintf(avctx, "\n");
+    } else {
+        av_log_ask_for_sample(avctx, "Unknown extradata size\n");
+        return AVERROR_INVALIDDATA;
+    }
+    /** generic init */
+    s->log2_frame_size = av_log2(avctx->block_align) + 4;
+    /** frame info */
+    s->skip_frame  = 1; /** skip first frame */
+    s->packet_loss = 1;
+    s->len_prefix  = (s->decode_flags & 0x40);
+    if (!s->len_prefix) {
+        av_log_ask_for_sample(avctx, "no length prefix\n");
+        return AVERROR_INVALIDDATA;
+    }
+    /** get frame len */
+    s->samples_per_frame = 1 << ff_wma_get_frame_len_bits(avctx->sample_rate,
+                                                          3, s->decode_flags);
+    /** init previous block len */
+    for (i = 0; i < avctx->channels; i++)
+        s->channel[i].prev_block_len = s->samples_per_frame;
+    /** subframe info */
+    log2_max_num_subframes       = ((s->decode_flags & 0x38) >> 3);
+    s->max_num_subframes         = 1 << log2_max_num_subframes;
+    if (s->max_num_subframes == 16)
+        s->max_subframe_len_bit = 1;
+    s->subframe_len_bits = av_log2(log2_max_num_subframes) + 1;
+    num_possible_block_sizes     = log2_max_num_subframes + 1;
+    s->min_samples_per_subframe  = s->samples_per_frame / s->max_num_subframes;
+    s->dynamic_range_compression = (s->decode_flags & 0x80);
+    if (s->max_num_subframes > MAX_SUBFRAMES) {
+        av_log(avctx, AV_LOG_ERROR, "invalid number of subframes %i\n",
+               s->max_num_subframes);
+        return AVERROR_INVALIDDATA;
+    }
+    s->num_channels = avctx->channels;
+    /** extract lfe channel position */
+    s->lfe_channel = -1;
+    if (channel_mask & 8) {
+        unsigned int mask;
+        for (mask = 1; mask < 16; mask <<= 1) {
+            if (channel_mask & mask)
+                ++s->lfe_channel;
+        }
+    }
+    if (s->num_channels < 0) {
+        av_log(avctx, AV_LOG_ERROR, "invalid number of channels %d\n", s->num_channels);
+        return AVERROR_INVALIDDATA;
+    } else if (s->num_channels > WMAPRO_MAX_CHANNELS) {
+        av_log_ask_for_sample(avctx, "unsupported number of channels\n");
+        return AVERROR_PATCHWELCOME;
+    }
+    INIT_VLC_STATIC(&sf_vlc, SCALEVLCBITS, HUFF_SCALE_SIZE,
+                    scale_huffbits, 1, 1,
+                    scale_huffcodes, 2, 2, 616);
+    INIT_VLC_STATIC(&sf_rl_vlc, VLCBITS, HUFF_SCALE_RL_SIZE,
+                    scale_rl_huffbits, 1, 1,
+                    scale_rl_huffcodes, 4, 4, 1406);
+    INIT_VLC_STATIC(&coef_vlc[0], VLCBITS, HUFF_COEF0_SIZE,
+                    coef0_huffbits, 1, 1,
+                    coef0_huffcodes, 4, 4, 2108);
+    INIT_VLC_STATIC(&coef_vlc[1], VLCBITS, HUFF_COEF1_SIZE,
+                    coef1_huffbits, 1, 1,
+                    coef1_huffcodes, 4, 4, 3912);
+    INIT_VLC_STATIC(&vec4_vlc, VLCBITS, HUFF_VEC4_SIZE,
+                    vec4_huffbits, 1, 1,
+                    vec4_huffcodes, 2, 2, 604);
+    INIT_VLC_STATIC(&vec2_vlc, VLCBITS, HUFF_VEC2_SIZE,
+                    vec2_huffbits, 1, 1,
+                    vec2_huffcodes, 2, 2, 562);
+    INIT_VLC_STATIC(&vec1_vlc, VLCBITS, HUFF_VEC1_SIZE,
+                    vec1_huffbits, 1, 1,
+                    vec1_huffcodes, 2, 2, 562);
+    /** calculate number of scale factor bands and their offsets
+        for every possible block size */
+    for (i = 0; i < num_possible_block_sizes; i++) {
+        int subframe_len = s->samples_per_frame >> i;
+        int x;
+        int band = 1;
+        s->sfb_offsets[i][0] = 0;
+        for (x = 0; x < MAX_BANDS-1 && s->sfb_offsets[i][band - 1] < subframe_len; x++) {
+            int offset = (subframe_len * 2 * critical_freq[x])
+                          / s->avctx->sample_rate + 2;
+            offset &= ~3;
+            if (offset > s->sfb_offsets[i][band - 1])
+                s->sfb_offsets[i][band++] = offset;
+        }
+        s->sfb_offsets[i][band - 1] = subframe_len;
+        s->num_sfb[i]               = band - 1;
+    }
+    /** Scale factors can be shared between blocks of different size
+        as every block has a different scale factor band layout.
+        The matrix sf_offsets is needed to find the correct scale factor.
+     */
+    for (i = 0; i < num_possible_block_sizes; i++) {
+        int b;
+        for (b = 0; b < s->num_sfb[i]; b++) {
+            int x;
+            int offset = ((s->sfb_offsets[i][b]
+                           + s->sfb_offsets[i][b + 1] - 1) << i) >> 1;
+            for (x = 0; x < num_possible_block_sizes; x++) {
+                int v = 0;
+                while (s->sfb_offsets[x][v + 1] << x < offset)
+                    ++v;
+                s->sf_offsets[i][x][b] = v;
+            }
+        }
+    }
+    /** init MDCT, FIXME: only init needed sizes */
+    for (i = 0; i < WMAPRO_BLOCK_SIZES; i++)
+        ff_mdct_init(&s->mdct_ctx[i], BLOCK_MIN_BITS+1+i, 1,
+                     1.0 / (1 << (BLOCK_MIN_BITS + i - 1))
+                     / (1 << (s->bits_per_sample - 1)));
+    /** init MDCT windows: simple sinus window */
+    for (i = 0; i < WMAPRO_BLOCK_SIZES; i++) {
+        const int win_idx = WMAPRO_BLOCK_MAX_BITS - i;
+        ff_init_ff_sine_windows(win_idx);
+        s->windows[WMAPRO_BLOCK_SIZES - i - 1] = ff_sine_windows[win_idx];
+    }
+    /** calculate subwoofer cutoff values */
+    for (i = 0; i < num_possible_block_sizes; i++) {
+        int block_size = s->samples_per_frame >> i;
+        int cutoff = (440*block_size + 3 * (s->avctx->sample_rate >> 1) - 1)
+                     / s->avctx->sample_rate;
+        s->subwoofer_cutoffs[i] = av_clip(cutoff, 4, block_size);
+    }
+    /** calculate sine values for the decorrelation matrix */
+    for (i = 0; i < 33; i++)
+        sin64[i] = sin(i*M_PI / 64.0);
+#if 0
+    if (avctx->debug & FF_DEBUG_BITSTREAM)
+        dump_context(s);
+#endif
+    avctx->channel_layout = channel_mask;
+    return 0;
+}
+/**
+ *@brief Decode the subframe length.
+ *@param s context
+ *@param offset sample offset in the frame
+ *@return decoded subframe length on success, < 0 in case of an error
+ */
+static int decode_subframe_length(WMAProDecodeCtx *s, int offset)
+{
+    int frame_len_shift = 0;
+    int subframe_len;
+    /** no need to read from the bitstream when only one length is possible */
+    if (offset == s->samples_per_frame - s->min_samples_per_subframe)
+        return s->min_samples_per_subframe;
+    /** 1 bit indicates if the subframe is of maximum length */
+    if (s->max_subframe_len_bit) {
+        if (get_bits1(&s->gb))
+            frame_len_shift = 1 + get_bits(&s->gb, s->subframe_len_bits-1);
+    } else
+        frame_len_shift = get_bits(&s->gb, s->subframe_len_bits);
+    subframe_len = s->samples_per_frame >> frame_len_shift;
+    /** sanity check the length */
+    if (subframe_len < s->min_samples_per_subframe ||
+        subframe_len > s->samples_per_frame) {
+        av_log(s->avctx, AV_LOG_ERROR, "broken frame: subframe_len %i\n",
+               subframe_len);
+        return AVERROR_INVALIDDATA;
+    }
+    return subframe_len;
+}
+/**
+ *@brief Decode how the data in the frame is split into subframes.
+ *       Every WMA frame contains the encoded data for a fixed number of
+ *       samples per channel. The data for every channel might be split
+ *       into several subframes. This function will reconstruct the list of
+ *       subframes for every channel.
+ *
+ *       If the subframes are not evenly split, the algorithm estimates the
+ *       channels with the lowest number of total samples.
+ *       Afterwards, for each of these channels a bit is read from the
+ *       bitstream that indicates if the channel contains a subframe with the
+ *       next subframe size that is going to be read from the bitstream or not.
+ *       If a channel contains such a subframe, the subframe size gets added to
+ *       the channel's subframe list.
+ *       The algorithm repeats these steps until the frame is properly divided
+ *       between the individual channels.
+ *
+ *@param s context
+ *@return 0 on success, < 0 in case of an error
+ */
+static int decode_tilehdr(WMAProDecodeCtx *s)
+{
+    uint16_t num_samples[WMAPRO_MAX_CHANNELS];        /** sum of samples for all currently known subframes of a channel */
+    uint8_t  contains_subframe[WMAPRO_MAX_CHANNELS];  /** flag indicating if a channel contains the current subframe */
+    int channels_for_cur_subframe = s->num_channels;  /** number of channels that contain the current subframe */
+    int fixed_channel_layout = 0;                     /** flag indicating that all channels use the same subframe offsets and sizes */
+    int min_channel_len = 0;                          /** smallest sum of samples (channels with this length will be processed first) */
+    int c;
+    /* Should never consume more than 3073 bits (256 iterations for the
+     * while loop when always the minimum amount of 128 samples is substracted
+     * from missing samples in the 8 channel case).
+     * 1 + BLOCK_MAX_SIZE * MAX_CHANNELS / BLOCK_MIN_SIZE * (MAX_CHANNELS  + 4)
+     */
+    /** reset tiling information */
+    for (c = 0; c < s->num_channels; c++)
+        s->channel[c].num_subframes = 0;
+    memset(num_samples, 0, sizeof(num_samples));
+    if (s->max_num_subframes == 1 || get_bits1(&s->gb))
+        fixed_channel_layout = 1;
+    /** loop until the frame data is split between the subframes */
+    do {
+        int subframe_len;
+        /** check which channels contain the subframe */
+        for (c = 0; c < s->num_channels; c++) {
+            if (num_samples[c] == min_channel_len) {
+                if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
+                   (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe))
+                    contains_subframe[c] = 1;
+                else
+                    contains_subframe[c] = get_bits1(&s->gb);
+            } else
+                contains_subframe[c] = 0;
+        }
+        /** get subframe length, subframe_len == 0 is not allowed */
+        if ((subframe_len = decode_subframe_length(s, min_channel_len)) <= 0)
+            return AVERROR_INVALIDDATA;
+        /** add subframes to the individual channels and find new min_channel_len */
+        min_channel_len += subframe_len;
+        for (c = 0; c < s->num_channels; c++) {
+            WMAProChannelCtx* chan = &s->channel[c];
+            if (contains_subframe[c]) {
+                if (chan->num_subframes >= MAX_SUBFRAMES) {
+                    av_log(s->avctx, AV_LOG_ERROR,
+                           "broken frame: num subframes > 31\n");
+                    return AVERROR_INVALIDDATA;
+                }
+                chan->subframe_len[chan->num_subframes] = subframe_len;
+                num_samples[c] += subframe_len;
+                ++chan->num_subframes;
+                if (num_samples[c] > s->samples_per_frame) {
+                    av_log(s->avctx, AV_LOG_ERROR, "broken frame: "
+                           "channel len > samples_per_frame\n");
+                    return AVERROR_INVALIDDATA;
+                }
+            } else if (num_samples[c] <= min_channel_len) {
+                if (num_samples[c] < min_channel_len) {
+                    channels_for_cur_subframe = 0;
+                    min_channel_len = num_samples[c];
+                }
+                ++channels_for_cur_subframe;
+            }
+        }
+    } while (min_channel_len < s->samples_per_frame);
+    for (c = 0; c < s->num_channels; c++) {
+        int i;
+        int offset = 0;
+        for (i = 0; i < s->channel[c].num_subframes; i++) {
+            dprintf(s->avctx, "frame[%i] channel[%i] subframe[%i]"
+                    " len %i\n", s->frame_num, c, i,
+                    s->channel[c].subframe_len[i]);
+            s->channel[c].subframe_offset[i] = offset;
+            offset += s->channel[c].subframe_len[i];
+        }
+    }
+    return 0;
+}
+/**
+ *@brief Calculate a decorrelation matrix from the bitstream parameters.
+ *@param s codec context
+ *@param chgroup channel group for which the matrix needs to be calculated
+ */
+static void decode_decorrelation_matrix(WMAProDecodeCtx *s,
+                                        WMAProChannelGrp *chgroup)
+{
+    int i;
+    int offset = 0;
+    int8_t rotation_offset[WMAPRO_MAX_CHANNELS * WMAPRO_MAX_CHANNELS];
+    memset(chgroup->decorrelation_matrix, 0, s->num_channels *
+           s->num_channels * sizeof(*chgroup->decorrelation_matrix));
+    for (i = 0; i < chgroup->num_channels * (chgroup->num_channels - 1) >> 1; i++)
+        rotation_offset[i] = get_bits(&s->gb, 6);
+    for (i = 0; i < chgroup->num_channels; i++)
+        chgroup->decorrelation_matrix[chgroup->num_channels * i + i] =
+            get_bits1(&s->gb) ? 1.0 : -1.0;
+    for (i = 1; i < chgroup->num_channels; i++) {
+        int x;
+        for (x = 0; x < i; x++) {
+            int y;
+            for (y = 0; y < i + 1; y++) {
+                float v1 = chgroup->decorrelation_matrix[x * chgroup->num_channels + y];
+                float v2 = chgroup->decorrelation_matrix[i * chgroup->num_channels + y];
+                int n = rotation_offset[offset + x];
+                float sinv;
+                float cosv;
+                if (n < 32) {
+                    sinv = sin64[n];
+                    cosv = sin64[32 - n];
+                } else {
+                    sinv =  sin64[64 -  n];
+                    cosv = -sin64[n  - 32];
+                }
+                chgroup->decorrelation_matrix[y + x * chgroup->num_channels] =
+                                               (v1 * sinv) - (v2 * cosv);
+                chgroup->decorrelation_matrix[y + i * chgroup->num_channels] =
+                                               (v1 * cosv) + (v2 * sinv);
+            }
+        }
+        offset += i;
+    }
+}
+/**
+ *@brief Decode channel transformation parameters
+ *@param s codec context
+ *@return 0 in case of success, < 0 in case of bitstream errors
+ */
+static int decode_channel_transform(WMAProDecodeCtx* s)
+{
+    int i;
+    /* should never consume more than 1921 bits for the 8 channel case
+     * 1 + MAX_CHANNELS * (MAX_CHANNELS + 2 + 3 * MAX_CHANNELS * MAX_CHANNELS
+     * + MAX_CHANNELS + MAX_BANDS + 1)
+     */
+    /** in the one channel case channel transforms are pointless */
+    s->num_chgroups = 0;
+    if (s->num_channels > 1) {
+        int remaining_channels = s->channels_for_cur_subframe;
+        if (get_bits1(&s->gb)) {
+            av_log_ask_for_sample(s->avctx,
+                                  "unsupported channel transform bit\n");
+            return AVERROR_INVALIDDATA;
+        }
+        for (s->num_chgroups = 0; remaining_channels &&
+             s->num_chgroups < s->channels_for_cur_subframe; s->num_chgroups++) {
+            WMAProChannelGrp* chgroup = &s->chgroup[s->num_chgroups];
+            float** channel_data = chgroup->channel_data;
+            chgroup->num_channels = 0;
+            chgroup->transform = 0;
+            /** decode channel mask */
+            if (remaining_channels > 2) {
+                for (i = 0; i < s->channels_for_cur_subframe; i++) {
+                    int channel_idx = s->channel_indexes_for_cur_subframe[i];
+                    if (!s->channel[channel_idx].grouped
+                        && get_bits1(&s->gb)) {
+                        ++chgroup->num_channels;
+                        s->channel[channel_idx].grouped = 1;
+                        *channel_data++ = s->channel[channel_idx].coeffs;
+                    }
+                }
+            } else {
+                chgroup->num_channels = remaining_channels;
+                for (i = 0; i < s->channels_for_cur_subframe; i++) {
+                    int channel_idx = s->channel_indexes_for_cur_subframe[i];
+                    if (!s->channel[channel_idx].grouped)
+                        *channel_data++ = s->channel[channel_idx].coeffs;
+                    s->channel[channel_idx].grouped = 1;
+                }
+            }
+            /** decode transform type */
+            if (chgroup->num_channels == 2) {
+                if (get_bits1(&s->gb)) {
+                    if (get_bits1(&s->gb)) {
+                        av_log_ask_for_sample(s->avctx,
+                                              "unsupported channel transform type\n");
+                    }
+                } else {
+                    chgroup->transform = 1;
+                    if (s->num_channels == 2) {
+                        chgroup->decorrelation_matrix[0] =  1.0;
+                        chgroup->decorrelation_matrix[1] = -1.0;
+                        chgroup->decorrelation_matrix[2] =  1.0;
+                        chgroup->decorrelation_matrix[3] =  1.0;
+                    } else {
+                        /** cos(pi/4) */
+                        chgroup->decorrelation_matrix[0] =  0.70703125;
+                        chgroup->decorrelation_matrix[1] = -0.70703125;
+                        chgroup->decorrelation_matrix[2] =  0.70703125;
+                        chgroup->decorrelation_matrix[3] =  0.70703125;
+                    }
+                }
+            } else if (chgroup->num_channels > 2) {
+                if (get_bits1(&s->gb)) {
+                    chgroup->transform = 1;
+                    if (get_bits1(&s->gb)) {
+                        decode_decorrelation_matrix(s, chgroup);
+                    } else {
+                        /** FIXME: more than 6 coupled channels not supported */
+                        if (chgroup->num_channels > 6) {
+                            av_log_ask_for_sample(s->avctx,
+                                                  "coupled channels > 6\n");
+                        } else {
+                            memcpy(chgroup->decorrelation_matrix,
+                                   default_decorrelation[chgroup->num_channels],
+                                   chgroup->num_channels * chgroup->num_channels *
+                                   sizeof(*chgroup->decorrelation_matrix));
+                        }
+                    }
+                }
+            }
+            /** decode transform on / off */
+            if (chgroup->transform) {
+                if (!get_bits1(&s->gb)) {
+                    int i;
+                    /** transform can be enabled for individual bands */
+                    for (i = 0; i < s->num_bands; i++) {
+                        chgroup->transform_band[i] = get_bits1(&s->gb);
+                    }
+                } else {
+                    memset(chgroup->transform_band, 1, s->num_bands);
+                }
+            }
+            remaining_channels -= chgroup->num_channels;
+        }
+    }
+    return 0;
+}
+/**
+ *@brief Extract the coefficients from the bitstream.
+ *@param s codec context
+ *@param c current channel number
+ *@return 0 on success, < 0 in case of bitstream errors
+ */
+static int decode_coeffs(WMAProDecodeCtx *s, int c)
+{
+    /* Integers 0..15 as single-precision floats.  The table saves a
+       costly int to float conversion, and storing the values as
+       integers allows fast sign-flipping. */
+    static const int fval_tab[16] = {
+        0x00000000, 0x3f800000, 0x40000000, 0x40400000,
+        0x40800000, 0x40a00000, 0x40c00000, 0x40e00000,
+        0x41000000, 0x41100000, 0x41200000, 0x41300000,
+        0x41400000, 0x41500000, 0x41600000, 0x41700000,
+    };
+    int vlctable;
+    VLC* vlc;
+    WMAProChannelCtx* ci = &s->channel[c];
+    int rl_mode = 0;
+    int cur_coeff = 0;
+    int num_zeros = 0;
+    const uint16_t* run;
+    const float* level;
+    dprintf(s->avctx, "decode coefficients for channel %i\n", c);
+    vlctable = get_bits1(&s->gb);
+    vlc = &coef_vlc[vlctable];
+    if (vlctable) {
+        run = coef1_run;
+        level = coef1_level;
+    } else {
+        run = coef0_run;
+        level = coef0_level;
+    }
+    /** decode vector coefficients (consumes up to 167 bits per iteration for
+      4 vector coded large values) */
+    while (!rl_mode && cur_coeff + 3 < s->subframe_len) {
+        int vals[4];
+        int i;
+        unsigned int idx;
+        idx = get_vlc2(&s->gb, vec4_vlc.table, VLCBITS, VEC4MAXDEPTH);
+        if (idx == HUFF_VEC4_SIZE - 1) {
+            for (i = 0; i < 4; i += 2) {
+                idx = get_vlc2(&s->gb, vec2_vlc.table, VLCBITS, VEC2MAXDEPTH);
+                if (idx == HUFF_VEC2_SIZE - 1) {
+                    int v0, v1;
+                    v0 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
+                    if (v0 == HUFF_VEC1_SIZE - 1)
+                        v0 += ff_wma_get_large_val(&s->gb);
+                    v1 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
+                    if (v1 == HUFF_VEC1_SIZE - 1)
+                        v1 += ff_wma_get_large_val(&s->gb);
+                    ((float*)vals)[i  ] = v0;
+                    ((float*)vals)[i+1] = v1;
+                } else {
+                    vals[i]   = fval_tab[symbol_to_vec2[idx] >> 4 ];
+                    vals[i+1] = fval_tab[symbol_to_vec2[idx] & 0xF];
+                }
+            }
+        } else {
+            vals[0] = fval_tab[ symbol_to_vec4[idx] >> 12      ];
+            vals[1] = fval_tab[(symbol_to_vec4[idx] >> 8) & 0xF];
+            vals[2] = fval_tab[(symbol_to_vec4[idx] >> 4) & 0xF];
+            vals[3] = fval_tab[ symbol_to_vec4[idx]       & 0xF];
+        }
+        /** decode sign */
+        for (i = 0; i < 4; i++) {
+            if (vals[i]) {
+                int sign = get_bits1(&s->gb) - 1;
+                *(uint32_t*)&ci->coeffs[cur_coeff] = vals[i] ^ sign<<31;
+                num_zeros = 0;
+            } else {
+                ci->coeffs[cur_coeff] = 0;
+                /** switch to run level mode when subframe_len / 128 zeros
+                    were found in a row */
+                rl_mode |= (++num_zeros > s->subframe_len >> 8);
+            }
+            ++cur_coeff;
+        }
+    }
+    /** decode run level coded coefficients */
+    if (rl_mode) {
+        memset(&ci->coeffs[cur_coeff], 0,
+               sizeof(*ci->coeffs) * (s->subframe_len - cur_coeff));
+        if (ff_wma_run_level_decode(s->avctx, &s->gb, vlc,
+                                    level, run, 1, ci->coeffs,
+                                    cur_coeff, s->subframe_len,
+                                    s->subframe_len, s->esc_len, 0))
+            return AVERROR_INVALIDDATA;
+    }
+    return 0;
+}
+/**
+ *@brief Extract scale factors from the bitstream.
+ *@param s codec context
+ *@return 0 on success, < 0 in case of bitstream errors
+ */
+static int decode_scale_factors(WMAProDecodeCtx* s)
+{
+    int i;
+    /** should never consume more than 5344 bits
+     *  MAX_CHANNELS * (1 +  MAX_BANDS * 23)
+     */
+    for (i = 0; i < s->channels_for_cur_subframe; i++) {
+        int c = s->channel_indexes_for_cur_subframe[i];
+        int* sf;
+        int* sf_end;
+        s->channel[c].scale_factors = s->channel[c].saved_scale_factors[!s->channel[c].scale_factor_idx];
+        sf_end = s->channel[c].scale_factors + s->num_bands;
+        /** resample scale factors for the new block size
+         *  as the scale factors might need to be resampled several times
+         *  before some  new values are transmitted, a backup of the last
+         *  transmitted scale factors is kept in saved_scale_factors
+         */
+        if (s->channel[c].reuse_sf) {
+            const int8_t* sf_offsets = s->sf_offsets[s->table_idx][s->channel[c].table_idx];
+            int b;
+            for (b = 0; b < s->num_bands; b++)
+                s->channel[c].scale_factors[b] =
+                    s->channel[c].saved_scale_factors[s->channel[c].scale_factor_idx][*sf_offsets++];
+        }
+        if (!s->channel[c].cur_subframe || get_bits1(&s->gb)) {
+            if (!s->channel[c].reuse_sf) {
+                int val;
+                /** decode DPCM coded scale factors */
+                s->channel[c].scale_factor_step = get_bits(&s->gb, 2) + 1;
+                val = 45 / s->channel[c].scale_factor_step;
+                for (sf = s->channel[c].scale_factors; sf < sf_end; sf++) {
+                    val += get_vlc2(&s->gb, sf_vlc.table, SCALEVLCBITS, SCALEMAXDEPTH) - 60;
+                    *sf = val;
+                }
+            } else {
+                int i;
+                /** run level decode differences to the resampled factors */
+                for (i = 0; i < s->num_bands; i++) {
+                    int idx;
+                    int skip;
+                    int val;
+                    int sign;
+                    idx = get_vlc2(&s->gb, sf_rl_vlc.table, VLCBITS, SCALERLMAXDEPTH);
+                    if (!idx) {
+                        uint32_t code = get_bits(&s->gb, 14);
+                        val  =  code >> 6;
+                        sign = (code & 1) - 1;
+                        skip = (code & 0x3f) >> 1;
+                    } else if (idx == 1) {
+                        break;
+                    } else {
+                        skip = scale_rl_run[idx];
+                        val  = scale_rl_level[idx];
+                        sign = get_bits1(&s->gb)-1;
+                    }
+                    i += skip;
+                    if (i >= s->num_bands) {
+                        av_log(s->avctx, AV_LOG_ERROR,
+                               "invalid scale factor coding\n");
+                        return AVERROR_INVALIDDATA;
+                    }
+                    s->channel[c].scale_factors[i] += (val ^ sign) - sign;
+                }
+            }
+            /** swap buffers */
+            s->channel[c].scale_factor_idx = !s->channel[c].scale_factor_idx;
+            s->channel[c].table_idx = s->table_idx;
+            s->channel[c].reuse_sf  = 1;
+        }
+        /** calculate new scale factor maximum */
+        s->channel[c].max_scale_factor = s->channel[c].scale_factors[0];
+        for (sf = s->channel[c].scale_factors + 1; sf < sf_end; sf++) {
+            s->channel[c].max_scale_factor =
+                FFMAX(s->channel[c].max_scale_factor, *sf);
+        }
+    }
+    return 0;
+}
+/**
+ *@brief Reconstruct the individual channel data.
+ *@param s codec context
+ */
+static void inverse_channel_transform(WMAProDecodeCtx *s)
+{
+    int i;
+    for (i = 0; i < s->num_chgroups; i++) {
+        if (s->chgroup[i].transform) {
+            float data[WMAPRO_MAX_CHANNELS];
+            const int num_channels = s->chgroup[i].num_channels;
+            float** ch_data = s->chgroup[i].channel_data;
+            float** ch_end = ch_data + num_channels;
+            const int8_t* tb = s->chgroup[i].transform_band;
+            int16_t* sfb;
+            /** multichannel decorrelation */
+            for (sfb = s->cur_sfb_offsets;
+                 sfb < s->cur_sfb_offsets + s->num_bands; sfb++) {
+                int y;
+                if (*tb++ == 1) {
+                    /** multiply values with the decorrelation_matrix */
+                    for (y = sfb[0]; y < FFMIN(sfb[1], s->subframe_len); y++) {
+                        const float* mat = s->chgroup[i].decorrelation_matrix;
+                        const float* data_end = data + num_channels;
+                        float* data_ptr = data;
+                        float** ch;
+                        for (ch = ch_data; ch < ch_end; ch++)
+                            *data_ptr++ = (*ch)[y];
+                        for (ch = ch_data; ch < ch_end; ch++) {
+                            float sum = 0;
+                            data_ptr = data;
+                            while (data_ptr < data_end)
+                                sum += *data_ptr++ * *mat++;
+                            (*ch)[y] = sum;
+                        }
+                    }
+                } else if (s->num_channels == 2) {
+                    int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
+                    s->dsp.vector_fmul_scalar(ch_data[0] + sfb[0],
+                                              ch_data[0] + sfb[0],
+                                              181.0 / 128, len);
+                    s->dsp.vector_fmul_scalar(ch_data[1] + sfb[0],
+                                              ch_data[1] + sfb[0],
+                                              181.0 / 128, len);
+                }
+            }
+        }
+    }
+}
+/**
+ *@brief Apply sine window and reconstruct the output buffer.
+ *@param s codec context
+ */
+static void wmapro_window(WMAProDecodeCtx *s)
+{
+    int i;
+    for (i = 0; i < s->channels_for_cur_subframe; i++) {
+        int c = s->channel_indexes_for_cur_subframe[i];
+        float* window;
+        int winlen = s->channel[c].prev_block_len;
+        float* start = s->channel[c].coeffs - (winlen >> 1);
+        if (s->subframe_len < winlen) {
+            start += (winlen - s->subframe_len) >> 1;
+            winlen = s->subframe_len;
+        }
+        window = s->windows[av_log2(winlen) - BLOCK_MIN_BITS];
+        winlen >>= 1;
+        s->dsp.vector_fmul_window(start, start, start + winlen,
+                                  window, 0, winlen);
+        s->channel[c].prev_block_len = s->subframe_len;
+    }
+}
+/**
+ *@brief Decode a single subframe (block).
+ *@param s codec context
+ *@return 0 on success, < 0 when decoding failed
+ */
+static int decode_subframe(WMAProDecodeCtx *s)
+{
+    int offset = s->samples_per_frame;
+    int subframe_len = s->samples_per_frame;
+    int i;
+    int total_samples   = s->samples_per_frame * s->num_channels;
+    int transmit_coeffs = 0;
+    int cur_subwoofer_cutoff;
+    s->subframe_offset = get_bits_count(&s->gb);
+    /** reset channel context and find the next block offset and size
+        == the next block of the channel with the smallest number of
+        decoded samples
+    */
+    for (i = 0; i < s->num_channels; i++) {
+        s->channel[i].grouped = 0;
+        if (offset > s->channel[i].decoded_samples) {
+            offset = s->channel[i].decoded_samples;
+            subframe_len =
+                s->channel[i].subframe_len[s->channel[i].cur_subframe];
+        }
+    }
+    dprintf(s->avctx,
+            "processing subframe with offset %i len %i\n", offset, subframe_len);
+    /** get a list of all channels that contain the estimated block */
+    s->channels_for_cur_subframe = 0;
+    for (i = 0; i < s->num_channels; i++) {
+        const int cur_subframe = s->channel[i].cur_subframe;
+        /** substract already processed samples */
+        total_samples -= s->channel[i].decoded_samples;
+        /** and count if there are multiple subframes that match our profile */
+        if (offset == s->channel[i].decoded_samples &&
+            subframe_len == s->channel[i].subframe_len[cur_subframe]) {
+            total_samples -= s->channel[i].subframe_len[cur_subframe];
+            s->channel[i].decoded_samples +=
+                s->channel[i].subframe_len[cur_subframe];
+            s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
+            ++s->channels_for_cur_subframe;
+        }
+    }
+    /** check if the frame will be complete after processing the
+        estimated block */
+    if (!total_samples)
+        s->parsed_all_subframes = 1;
+    dprintf(s->avctx, "subframe is part of %i channels\n",
+            s->channels_for_cur_subframe);
+    /** calculate number of scale factor bands and their offsets */
+    s->table_idx         = av_log2(s->samples_per_frame/subframe_len);
+    s->num_bands         = s->num_sfb[s->table_idx];
+    s->cur_sfb_offsets   = s->sfb_offsets[s->table_idx];
+    cur_subwoofer_cutoff = s->subwoofer_cutoffs[s->table_idx];
+    /** configure the decoder for the current subframe */
+    for (i = 0; i < s->channels_for_cur_subframe; i++) {
+        int c = s->channel_indexes_for_cur_subframe[i];
+        s->channel[c].coeffs = &s->channel[c].out[(s->samples_per_frame >> 1)
+                                                  + offset];
+    }
+    s->subframe_len = subframe_len;
+    s->esc_len = av_log2(s->subframe_len - 1) + 1;
+    /** skip extended header if any */
+    if (get_bits1(&s->gb)) {
+        int num_fill_bits;
+        if (!(num_fill_bits = get_bits(&s->gb, 2))) {
+            int len = get_bits(&s->gb, 4);
+            num_fill_bits = get_bits(&s->gb, len) + 1;
+        }
+        if (num_fill_bits >= 0) {
+            if (get_bits_count(&s->gb) + num_fill_bits > s->num_saved_bits) {
+                av_log(s->avctx, AV_LOG_ERROR, "invalid number of fill bits\n");
+                return AVERROR_INVALIDDATA;
+            }
+            skip_bits_long(&s->gb, num_fill_bits);
+        }
+    }
+    /** no idea for what the following bit is used */
+    if (get_bits1(&s->gb)) {
+        av_log_ask_for_sample(s->avctx, "reserved bit set\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (decode_channel_transform(s) < 0)
+        return AVERROR_INVALIDDATA;
+    for (i = 0; i < s->channels_for_cur_subframe; i++) {
+        int c = s->channel_indexes_for_cur_subframe[i];
+        if ((s->channel[c].transmit_coefs = get_bits1(&s->gb)))
+            transmit_coeffs = 1;
+    }
+    if (transmit_coeffs) {
+        int step;
+        int quant_step = 90 * s->bits_per_sample >> 4;
+        if ((get_bits1(&s->gb))) {
+            /** FIXME: might change run level mode decision */
+            av_log_ask_for_sample(s->avctx, "unsupported quant step coding\n");
+            return AVERROR_INVALIDDATA;
+        }
+        /** decode quantization step */
+        step = get_sbits(&s->gb, 6);
+        quant_step += step;
+        if (step == -32 || step == 31) {
+            const int sign = (step == 31) - 1;
+            int quant = 0;
+            while (get_bits_count(&s->gb) + 5 < s->num_saved_bits &&
+                   (step = get_bits(&s->gb, 5)) == 31) {
+                quant += 31;
+            }
+            quant_step += ((quant + step) ^ sign) - sign;
+        }
+        if (quant_step < 0) {
+            av_log(s->avctx, AV_LOG_DEBUG, "negative quant step\n");
+        }
+        /** decode quantization step modifiers for every channel */
+        if (s->channels_for_cur_subframe == 1) {
+            s->channel[s->channel_indexes_for_cur_subframe[0]].quant_step = quant_step;
+        } else {
+            int modifier_len = get_bits(&s->gb, 3);
+            for (i = 0; i < s->channels_for_cur_subframe; i++) {
+                int c = s->channel_indexes_for_cur_subframe[i];
+                s->channel[c].quant_step = quant_step;
+                if (get_bits1(&s->gb)) {
+                    if (modifier_len) {
+                        s->channel[c].quant_step += get_bits(&s->gb, modifier_len) + 1;
+                    } else
+                        ++s->channel[c].quant_step;
+                }
+            }
+        }
+        /** decode scale factors */
+        if (decode_scale_factors(s) < 0)
+            return AVERROR_INVALIDDATA;
+    }
+    dprintf(s->avctx, "BITSTREAM: subframe header length was %i\n",
+            get_bits_count(&s->gb) - s->subframe_offset);
+    /** parse coefficients */
+    for (i = 0; i < s->channels_for_cur_subframe; i++) {
+        int c = s->channel_indexes_for_cur_subframe[i];
+        if (s->channel[c].transmit_coefs &&
+            get_bits_count(&s->gb) < s->num_saved_bits) {
+            decode_coeffs(s, c);
+        } else
+            memset(s->channel[c].coeffs, 0,
+                   sizeof(*s->channel[c].coeffs) * subframe_len);
+    }
+    dprintf(s->avctx, "BITSTREAM: subframe length was %i\n",
+            get_bits_count(&s->gb) - s->subframe_offset);
+    if (transmit_coeffs) {
+        /** reconstruct the per channel data */
+        inverse_channel_transform(s);
+        for (i = 0; i < s->channels_for_cur_subframe; i++) {
+            int c = s->channel_indexes_for_cur_subframe[i];
+            const int* sf = s->channel[c].scale_factors;
+            int b;
+            if (c == s->lfe_channel)
+                memset(&s->tmp[cur_subwoofer_cutoff], 0, sizeof(*s->tmp) *
+                       (subframe_len - cur_subwoofer_cutoff));
+            /** inverse quantization and rescaling */
+            for (b = 0; b < s->num_bands; b++) {
+                const int end = FFMIN(s->cur_sfb_offsets[b+1], s->subframe_len);
+                const int exp = s->channel[c].quant_step -
+                            (s->channel[c].max_scale_factor - *sf++) *
+                            s->channel[c].scale_factor_step;
+                const float quant = pow(10.0, exp / 20.0);
+                int start = s->cur_sfb_offsets[b];
+                s->dsp.vector_fmul_scalar(s->tmp + start,
+                                          s->channel[c].coeffs + start,
+                                          quant, end - start);
+            }
+            /** apply imdct (ff_imdct_half == DCTIV with reverse) */
+            ff_imdct_half(&s->mdct_ctx[av_log2(subframe_len) - BLOCK_MIN_BITS],
+                          s->channel[c].coeffs, s->tmp);
+        }
+    }
+    /** window and overlapp-add */
+    wmapro_window(s);
+    /** handled one subframe */
+    for (i = 0; i < s->channels_for_cur_subframe; i++) {
+        int c = s->channel_indexes_for_cur_subframe[i];
+        if (s->channel[c].cur_subframe >= s->channel[c].num_subframes) {
+            av_log(s->avctx, AV_LOG_ERROR, "broken subframe\n");
+            return AVERROR_INVALIDDATA;
+        }
+        ++s->channel[c].cur_subframe;
+    }
+    return 0;
+}
+/**
+ *@brief Decode one WMA frame.
+ *@param s codec context
+ *@return 0 if the trailer bit indicates that this is the last frame,
+ *        1 if there are additional frames
+ */
+static int decode_frame(WMAProDecodeCtx *s)
+{
+    GetBitContext* gb = &s->gb;
+    int more_frames = 0;
+    int len = 0;
+    int i;
+    /** check for potential output buffer overflow */
+    if (s->num_channels * s->samples_per_frame > s->samples_end - s->samples) {
+        /** return an error if no frame could be decoded at all */
+        av_log(s->avctx, AV_LOG_ERROR,
+               "not enough space for the output samples\n");
+        s->packet_loss = 1;
+        return 0;
+    }
+    /** get frame length */
+    if (s->len_prefix)
+        len = get_bits(gb, s->log2_frame_size);
+    dprintf(s->avctx, "decoding frame with length %x\n", len);
+    /** decode tile information */
+    if (decode_tilehdr(s)) {
+        s->packet_loss = 1;
+        return 0;
+    }
+    /** read postproc transform */
+    if (s->num_channels > 1 && get_bits1(gb)) {
+        av_log_ask_for_sample(s->avctx, "Unsupported postproc transform found\n");
+        s->packet_loss = 1;
+        return 0;
+    }
+    /** read drc info */
+    if (s->dynamic_range_compression) {
+        s->drc_gain = get_bits(gb, 8);
+        dprintf(s->avctx, "drc_gain %i\n", s->drc_gain);
+    }
+    /** no idea what these are for, might be the number of samples
+        that need to be skipped at the beginning or end of a stream */
+    if (get_bits1(gb)) {
+        int skip;
+        /** usually true for the first frame */
+        if (get_bits1(gb)) {
+            skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
+            dprintf(s->avctx, "start skip: %i\n", skip);
+        }
+        /** sometimes true for the last frame */
+        if (get_bits1(gb)) {
+            skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
+            dprintf(s->avctx, "end skip: %i\n", skip);
+        }
+    }
+    dprintf(s->avctx, "BITSTREAM: frame header length was %i\n",
+            get_bits_count(gb) - s->frame_offset);
+    /** reset subframe states */
+    s->parsed_all_subframes = 0;
+    for (i = 0; i < s->num_channels; i++) {
+        s->channel[i].decoded_samples = 0;
+        s->channel[i].cur_subframe    = 0;
+        s->channel[i].reuse_sf        = 0;
+    }
+    /** decode all subframes */
+    while (!s->parsed_all_subframes) {
+        if (decode_subframe(s) < 0) {
+            s->packet_loss = 1;
+            return 0;
+        }
+    }
+    /** interleave samples and write them to the output buffer */
+    for (i = 0; i < s->num_channels; i++) {
+        float* ptr  = s->samples + i;
+        int incr = s->num_channels;
+        float* iptr = s->channel[i].out;
+        float* iend = iptr + s->samples_per_frame;
+        while (iptr < iend) {
+            *ptr = av_clipf(*iptr++, -1.0, 32767.0 / 32768.0);
+            ptr += incr;
+        }
+        /** reuse second half of the IMDCT output for the next frame */
+        memcpy(&s->channel[i].out[0],
+               &s->channel[i].out[s->samples_per_frame],
+               s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
+    }
+    if (s->skip_frame) {
+        s->skip_frame = 0;
+    } else
+        s->samples += s->num_channels * s->samples_per_frame;
+    if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
+        /** FIXME: not sure if this is always an error */
+        av_log(s->avctx, AV_LOG_ERROR, "frame[%i] would have to skip %i bits\n",
+               s->frame_num, len - (get_bits_count(gb) - s->frame_offset) - 1);
+        s->packet_loss = 1;
+        return 0;
+    }
+    /** skip the rest of the frame data */
+    skip_bits_long(gb, len - (get_bits_count(gb) - s->frame_offset) - 1);
+    /** decode trailer bit */
+    more_frames = get_bits1(gb);
+    ++s->frame_num;
+    return more_frames;
+}
+/**
+ *@brief Calculate remaining input buffer length.
+ *@param s codec context
+ *@param gb bitstream reader context
+ *@return remaining size in bits
+ */
+static int remaining_bits(WMAProDecodeCtx *s, GetBitContext *gb)
+{
+    return s->buf_bit_size - get_bits_count(gb);
+}
+/**
+ *@brief Fill the bit reservoir with a (partial) frame.
+ *@param s codec context
+ *@param gb bitstream reader context
+ *@param len length of the partial frame
+ *@param append decides wether to reset the buffer or not
+ */
+static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
+                      int append)
+{
+    int buflen;
+    /** when the frame data does not need to be concatenated, the input buffer
+        is resetted and additional bits from the previous frame are copyed
+        and skipped later so that a fast byte copy is possible */
+    if (!append) {
+        s->frame_offset = get_bits_count(gb) & 7;
+        s->num_saved_bits = s->frame_offset;
+        init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
+    }
+    buflen = (s->num_saved_bits + len + 8) >> 3;
+    if (len <= 0 || buflen > MAX_FRAMESIZE) {
+        av_log_ask_for_sample(s->avctx, "input buffer too small\n");
+        s->packet_loss = 1;
+        return;
+    }
+    s->num_saved_bits += len;
+    if (!append) {
+        ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3),
+                     s->num_saved_bits);
+    } else {
+        int align = 8 - (get_bits_count(gb) & 7);
+        align = FFMIN(align, len);
+        put_bits(&s->pb, align, get_bits(gb, align));
+        len -= align;
+        ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3), len);
+    }
+    skip_bits_long(gb, len);
+    {
+        PutBitContext tmp = s->pb;
+        flush_put_bits(&tmp);
+    }
+    init_get_bits(&s->gb, s->frame_data, s->num_saved_bits);
+    skip_bits(&s->gb, s->frame_offset);
+}
+/**
+ *@brief Decode a single WMA packet.
+ *@param avctx codec context
+ *@param data the output buffer
+ *@param data_size number of bytes that were written to the output buffer
+ *@param avpkt input packet
+ *@return number of bytes that were read from the input buffer
+ */
+static int decode_packet(AVCodecContext *avctx,
+                         void *data, int *data_size, AVPacket* avpkt)
+{
+    WMAProDecodeCtx *s = avctx->priv_data;
+    GetBitContext* gb  = &s->pgb;
+    const uint8_t* buf = avpkt->data;
+    int buf_size       = avpkt->size;
+    int num_bits_prev_frame;
+    int packet_sequence_number;
+    s->samples       = data;
+    s->samples_end   = (float*)((int8_t*)data + *data_size);
+    *data_size = 0;
+    if (s->packet_done || s->packet_loss) {
+        s->packet_done = 0;
+        s->buf_bit_size = buf_size << 3;
+        /** sanity check for the buffer length */
+        if (buf_size < avctx->block_align)
+            return 0;
+        buf_size = avctx->block_align;
+        /** parse packet header */
+        init_get_bits(gb, buf, s->buf_bit_size);
+        packet_sequence_number = get_bits(gb, 4);
+        skip_bits(gb, 2);
+        /** get number of bits that need to be added to the previous frame */
+        num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
+        dprintf(avctx, "packet[%d]: nbpf %x\n", avctx->frame_number,
+                num_bits_prev_frame);
+        /** check for packet loss */
+        if (!s->packet_loss &&
+            ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
+            s->packet_loss = 1;
+            av_log(avctx, AV_LOG_ERROR, "Packet loss detected! seq %x vs %x\n",
+                   s->packet_sequence_number, packet_sequence_number);
+        }
+        s->packet_sequence_number = packet_sequence_number;
+        if (num_bits_prev_frame > 0) {
+            /** append the previous frame data to the remaining data from the
+                previous packet to create a full frame */
+            save_bits(s, gb, num_bits_prev_frame, 1);
+            dprintf(avctx, "accumulated %x bits of frame data\n",
+                    s->num_saved_bits - s->frame_offset);
+            /** decode the cross packet frame if it is valid */
+            if (!s->packet_loss)
+                decode_frame(s);
+        } else if (s->num_saved_bits - s->frame_offset) {
+            dprintf(avctx, "ignoring %x previously saved bits\n",
+                    s->num_saved_bits - s->frame_offset);
+        }
+        s->packet_loss = 0;
+    } else {
+        int frame_size;
+        s->buf_bit_size = avpkt->size << 3;
+        init_get_bits(gb, avpkt->data, s->buf_bit_size);
+        skip_bits(gb, s->packet_offset);
+        if (remaining_bits(s, gb) > s->log2_frame_size &&
+            (frame_size = show_bits(gb, s->log2_frame_size)) &&
+            frame_size <= remaining_bits(s, gb)) {
+            save_bits(s, gb, frame_size, 0);
+            s->packet_done = !decode_frame(s);
+        } else
+            s->packet_done = 1;
+    }
+    if (s->packet_done && !s->packet_loss &&
+        remaining_bits(s, gb) > 0) {
+        /** save the rest of the data so that it can be decoded
+            with the next packet */
+        save_bits(s, gb, remaining_bits(s, gb), 0);
+    }
+    *data_size = (int8_t *)s->samples - (int8_t *)data;
+    s->packet_offset = get_bits_count(gb) & 7;
+    return (s->packet_loss) ? AVERROR_INVALIDDATA : get_bits_count(gb) >> 3;
+}
+/**
+ *@brief Clear decoder buffers (for seeking).
+ *@param avctx codec context
+ */
+static void flush(AVCodecContext *avctx)
+{
+    WMAProDecodeCtx *s = avctx->priv_data;
+    int i;
+    /** reset output buffer as a part of it is used during the windowing of a
+        new frame */
+    for (i = 0; i < s->num_channels; i++)
+        memset(s->channel[i].out, 0, s->samples_per_frame *
+               sizeof(*s->channel[i].out));
+    s->packet_loss = 1;
+}
+#if 0
+/**
+ *@brief wmapro decoder
+ */
+AVCodec wmapro_decoder = {
+    "wmapro",
+    AVMEDIA_TYPE_AUDIO,
+    CODEC_ID_WMAPRO,
+    sizeof(WMAProDecodeCtx),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_packet,
+    .capabilities = CODEC_CAP_SUBFRAMES,
+    .flush= flush,
+    .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Professional"),
+};
+#endif
+int main(void)
+{
+    /* possible test program - just here now to silence the linker */
+    return 0;
+}
author	Mohamed Tarek <mt@rockbox.org>	2010-04-27 18:04:34 +0000
committer	Mohamed Tarek <mt@rockbox.org>	2010-04-27 18:04:34 +0000
commit	dda7fab1d65e73a6bdbdac1b1d37330b8f0085aa (patch)
tree	6446710d45643347d1f301bbfcb914e3ec4a39e4
parent	f96406c5f3aca04f3c0ab95275d857288fd8c2ea (diff)
download	rockbox-dda7fab1d65e73a6bdbdac1b1d37330b8f0085aa.tar.gz rockbox-dda7fab1d65e73a6bdbdac1b1d37330b8f0085aa.zip