From 6039eb05ba6d82ef56f2868c96654c552d117bf9 Mon Sep 17 00:00:00 2001
From: Franklin Wei <git@fwei.tk>
Date: Wed, 7 Feb 2018 20:04:46 -0500
Subject: sdl: remove non-rockbox drivers

We never use any of these other drivers, so having them around just takes
up space.

Change-Id: Iced812162df1fef3fd55522b7e700acb6c3bcd41
---
 .../sdl/src/video/ps3/spulibs/bilin_scaler.c       | 2050 --------------------
 1 file changed, 2050 deletions(-)
 delete mode 100644 apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c

(limited to 'apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c')
diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c b/apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c
deleted file mode 100644
index be9b5c6e8d..0000000000
--- a/apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c
+++ /dev/null
@@ -1,2050 +0,0 @@
-/*
- * SDL - Simple DirectMedia Layer
- * CELL BE Support for PS3 Framebuffer
- * Copyright (C) 2008, 2009 International Business Machines Corporation
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
- * USA
- *
- *  Martin Lowinski  <lowinski [at] de [dot] ibm [ibm] com>
- *  Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
- *  SPE code based on research by:
- *  Rene Becker
- *  Thimo Emmerich
- */
-
-#include "spu_common.h"
-
-#include <spu_intrinsics.h>
-#include <spu_mfcio.h>
-
-// Debugging
-//#define DEBUG
-
-#ifdef DEBUG
-#define deprintf(fmt, args... ) \
-	fprintf( stdout, fmt, ##args ); \
-	fflush( stdout );
-#else
-#define deprintf( fmt, args... )
-#endif
-
-struct scale_parms_t parms __attribute__((aligned(128)));
-
-/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored
- * there might be the need to retrieve misaligned data, adjust
- * incoming v and u plane to be able to handle this (add 128)
- */
-unsigned char y_plane[2][(MAX_HDTV_WIDTH+128)*4] __attribute__((aligned(128)));
-unsigned char v_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128)));
-unsigned char u_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128)));
-
-/* temp-buffer for scaling: 4 lines Y, therefore 2 lines V, 2 lines U */
-unsigned char scaled_y_plane[2][MAX_HDTV_WIDTH*2] __attribute__((aligned(128)));
-unsigned char scaled_v_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128)));
-unsigned char scaled_u_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128)));
-
-/* some vectors needed by the float to int conversion */
-static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f };
-static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f };
-
-void bilinear_scale_line_w8(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride);
-void bilinear_scale_line_w16(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride);
-
-void scale_srcw16_dstw16();
-void scale_srcw16_dstw32();
-void scale_srcw32_dstw16();
-void scale_srcw32_dstw32();
-
-int main( unsigned long long spe_id __attribute__((unused)), unsigned long long argp )
-{
-	deprintf("[SPU] bilin_scaler_spu is up... (on SPE #%llu)\n", spe_id);
-	/* DMA transfer for the input parameters */
-	spu_mfcdma32(&parms, (unsigned int)argp, sizeof(struct scale_parms_t), TAG_INIT, MFC_GET_CMD);
-	DMA_WAIT_TAG(TAG_INIT);
-
-	deprintf("[SPU] Scale %ux%u to %ux%u\n", parms.src_pixel_width, parms.src_pixel_height,
-			parms.dst_pixel_width, parms.dst_pixel_height);
-
-	if(parms.src_pixel_width & 0x1f) {
-		if(parms.dst_pixel_width & 0x1F) {
-			deprintf("[SPU] Using scale_srcw16_dstw16\n");
-			scale_srcw16_dstw16();
-		} else {
-			deprintf("[SPU] Using scale_srcw16_dstw32\n");
-			scale_srcw16_dstw32();
-		}
-	} else {
-		if(parms.dst_pixel_width & 0x1F) {
-			deprintf("[SPU] Using scale_srcw32_dstw16\n");
-			scale_srcw32_dstw16();
-		} else {
-			deprintf("[SPU] Using scale_srcw32_dstw32\n");
-			scale_srcw32_dstw32();
-		}
-	}
-	deprintf("[SPU] bilin_scaler_spu... done!\n");
-
-	return 0;
-}
-
-
-/*
- * vfloat_to_vuint()
- *
- * converts a float vector to an unsinged int vector using saturated
- * arithmetic
- *
- * @param vec_s float vector for conversion
- * @returns converted unsigned int vector
- */
-inline static vector unsigned int vfloat_to_vuint(vector float vec_s) {
-	vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
-	vec_s = spu_sel(vec_s, vec_0_1, select_1);
-
-	vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
-	vec_s = spu_sel(vec_s, vec_255, select_2);
-	return spu_convtu(vec_s,0);
-}
-
-
-/*
- * scale_srcw16_dstw16()
- *
- * processes an input image of width 16
- * scaling is done to a width 16
- * result stored in RAM
- */
-void scale_srcw16_dstw16() {
-	// extract parameters
-	unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
-
-	unsigned int src_width = parms.src_pixel_width;
-	unsigned int src_height = parms.src_pixel_height;
-	unsigned int dst_width = parms.dst_pixel_width;
-	unsigned int dst_height = parms.dst_pixel_height;
-
-	// YVU
-	unsigned int src_linestride_y = src_width;
-	unsigned int src_dbl_linestride_y = src_width<<1;
-	unsigned int src_linestride_vu = src_width>>1;
-	unsigned int src_dbl_linestride_vu = src_width;
-
-	// scaled YVU
-	unsigned int scaled_src_linestride_y = dst_width;
-
-	// ram addresses
-	unsigned char* src_addr_y = parms.y_plane;
-	unsigned char* src_addr_v = parms.v_plane;
-	unsigned char* src_addr_u = parms.u_plane;
-
-	// for handling misalignment, addresses are precalculated
-	unsigned char* precalc_src_addr_v = src_addr_v;
-	unsigned char* precalc_src_addr_u = src_addr_u;
-
-	unsigned int dst_picture_size = dst_width*dst_height;
-
-	// Sizes for destination
-	unsigned int dst_dbl_linestride_y = dst_width<<1;
-	unsigned int dst_dbl_linestride_vu = dst_width>>1;
-
-	// Perform address calculation for Y, V and U in main memory with dst_addr as base
-	unsigned char* dst_addr_main_memory_y = dst_addr;
-	unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
-	unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
-
-	// calculate scale factors
-	vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
-	float y_scale = (float)src_height/(float)dst_height;
-
-	// double buffered processing
-	// buffer switching
-	unsigned int curr_src_idx = 0;
-	unsigned int curr_dst_idx = 0;
-	unsigned int next_src_idx, next_dst_idx;
-
-	// 2 lines y as output, upper and lowerline
-	unsigned int curr_interpl_y_upper = 0;
-	unsigned int next_interpl_y_upper;
-	unsigned int curr_interpl_y_lower, next_interpl_y_lower;
-	// only 1 line v/u output, both planes have the same dimension
-	unsigned int curr_interpl_vu = 0;
-	unsigned int next_interpl_vu;
-
-	// weights, calculated in every loop iteration
-	vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_y_upper;
-	vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
-	vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_vu;
-
-	// line indices for the src picture
-	float curr_src_y_upper = 0.0f, next_src_y_upper;
-	float curr_src_y_lower, next_src_y_lower;
-	float curr_src_vu = 0.0f, next_src_vu;
-
-	// line indices for the dst picture
-	unsigned int dst_y=0, dst_vu=0;
-
-	// offset for the v and u plane to handle misalignement
-	unsigned int curr_lsoff_v = 0, next_lsoff_v;
-	unsigned int curr_lsoff_u = 0, next_lsoff_u;
-
-	// calculate lower line indices
-	curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
-	curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
-	// lower line weight
-	vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
-
-
-	// start partially double buffered processing
-	// get initial data, 2 sets of y, 1 set v, 1 set u
-	mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
-	mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
-			src_dbl_linestride_y,
-			RETR_BUF,
-			0, 0 );
-	mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-	mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-
-	/* iteration loop
-	 * within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
-	 * the scaled output is 2 lines y, 1 line v, 1 line u
-	 * the yuv2rgb-converted output is stored to RAM
-	 */
-	for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
-		dst_y = dst_vu<<1;
-
-		// calculate next indices
-		next_src_vu = ((float)dst_vu+1)*y_scale;
-		next_src_y_upper = ((float)dst_y+2)*y_scale;
-		next_src_y_lower = ((float)dst_y+3)*y_scale;
-
-		next_interpl_vu = (unsigned int) next_src_vu;
-		next_interpl_y_upper = (unsigned int) next_src_y_upper;
-		next_interpl_y_lower = (unsigned int) next_src_y_lower;
-
-		// calculate weight NORTH-SOUTH
-		vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
-		vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
-		vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
-
-		// get next lines
-		next_src_idx = curr_src_idx^1;
-		next_dst_idx = curr_dst_idx^1;
-
-		// 4 lines y
-		mfc_get( y_plane[next_src_idx],
-				(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
-				(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		// 2 lines v
-		precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu);
-		next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F;
-		mfc_get( v_plane[next_src_idx],
-				((unsigned int) precalc_src_addr_v)&0xFFFFFFF0,
-				src_dbl_linestride_vu+(next_lsoff_v<<1),
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		// 2 lines u
-		precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu);
-		next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F;
-		mfc_get( u_plane[next_src_idx],
-				((unsigned int) precalc_src_addr_u)&0xFFFFFFF0,
-				src_dbl_linestride_vu+(next_lsoff_v<<1),
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-		// scaling
-		// work line y_upper
-		bilinear_scale_line_w16( y_plane[curr_src_idx],
-				scaled_y_plane[curr_src_idx],
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_upper,
-				src_linestride_y );
-		// work line y_lower
-		bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-				scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_lower,
-				src_linestride_y );
-		// work line v
-		bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
-				scaled_v_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-		// work line u
-		bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
-				scaled_u_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-
-
-		// Store the result back to main memory into a destination buffer in YUV format
-		//---------------------------------------------------------------------------------------------
-		DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-		// Perform three DMA transfers to 3 different locations in the main memory!
-		// dst_width:	Pixel width of destination image
-		// dst_addr:	Destination address in main memory
-		// dst_vu:	Counter which is incremented one by one
-		// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-		mfc_put(	scaled_y_plane[curr_src_idx],					// What from local store (addr)
-				(unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-				dst_dbl_linestride_y,						// Two Y lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,						// Tag
-				0, 0 );
-
-		mfc_put(	scaled_v_plane[curr_src_idx],					// What from local store (addr)
-				(unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,						// Two V lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,						// Tag
-				0, 0 );
-
-		mfc_put(	scaled_u_plane[curr_src_idx],					// What from local store (addr)
-				(unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,						// Two U lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,						// Tag
-				0, 0 );
-		//---------------------------------------------------------------------------------------------
-
-
-		// update for next cycle
-		curr_src_idx = next_src_idx;
-		curr_dst_idx = next_dst_idx;
-
-		curr_interpl_y_upper = next_interpl_y_upper;
-		curr_interpl_y_lower = next_interpl_y_lower;
-		curr_interpl_vu = next_interpl_vu;
-
-		vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
-		vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
-		vf_curr_NSweight_vu = vf_next_NSweight_vu;
-
-		curr_src_y_upper = next_src_y_upper;
-		curr_src_y_lower = next_src_y_lower;
-		curr_src_vu = next_src_vu;
-
-		curr_lsoff_v = next_lsoff_v;
-		curr_lsoff_u = next_lsoff_u;
-	}
-
-
-
-	DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-	// scaling
-	// work line y_upper
-	bilinear_scale_line_w16( y_plane[curr_src_idx],
-			scaled_y_plane[curr_src_idx],
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_upper,
-			src_linestride_y );
-	// work line y_lower
-	bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_lower,
-			src_linestride_y );
-	// work line v
-	bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
-			scaled_v_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-	// work line u
-	bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
-			scaled_u_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-
-
-	// Store the result back to main memory into a destination buffer in YUV format
-	//---------------------------------------------------------------------------------------------
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-	// Perform three DMA transfers to 3 different locations in the main memory!
-	// dst_width:	Pixel width of destination image
-	// dst_addr:	Destination address in main memory
-	// dst_vu:	Counter which is incremented one by one
-	// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-	mfc_put(	scaled_y_plane[curr_src_idx],					// What from local store (addr)
-			(unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-			dst_dbl_linestride_y,						// Two Y lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,						// Tag
-			0, 0 );
-
-	mfc_put(	scaled_v_plane[curr_src_idx],					// What from local store (addr)
-			(unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-			dst_dbl_linestride_vu,						// Two V lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,						// Tag
-			0, 0 );
-
-	mfc_put(	scaled_u_plane[curr_src_idx],					// What from local store (addr)
-			(unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-			dst_dbl_linestride_vu,						// Two U lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,						// Tag
-			0, 0 );
-
-	// wait for completion
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-	//---------------------------------------------------------------------------------------------
-}
-
-
-/*
- * scale_srcw16_dstw32()
- *
- * processes an input image of width 16
- * scaling is done to a width 32
- * yuv2rgb conversion on a width of 32
- * result stored in RAM
- */
-void scale_srcw16_dstw32() {
-	// extract parameters
-	unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
-
-	unsigned int src_width = parms.src_pixel_width;
-	unsigned int src_height = parms.src_pixel_height;
-	unsigned int dst_width = parms.dst_pixel_width;
-	unsigned int dst_height = parms.dst_pixel_height;
-
-	// YVU
-	unsigned int src_linestride_y = src_width;
-	unsigned int src_dbl_linestride_y = src_width<<1;
-	unsigned int src_linestride_vu = src_width>>1;
-	unsigned int src_dbl_linestride_vu = src_width;
-	// scaled YVU
-	unsigned int scaled_src_linestride_y = dst_width;
-
-	// ram addresses
-	unsigned char* src_addr_y = parms.y_plane;
-	unsigned char* src_addr_v = parms.v_plane;
-	unsigned char* src_addr_u = parms.u_plane;
-
-	unsigned int dst_picture_size = dst_width*dst_height;
-
-	// Sizes for destination
-	unsigned int dst_dbl_linestride_y = dst_width<<1;
-	unsigned int dst_dbl_linestride_vu = dst_width>>1;
-
-	// Perform address calculation for Y, V and U in main memory with dst_addr as base
-	unsigned char* dst_addr_main_memory_y = dst_addr;
-	unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
-	unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
-
-
-	// for handling misalignment, addresses are precalculated
-	unsigned char* precalc_src_addr_v = src_addr_v;
-	unsigned char* precalc_src_addr_u = src_addr_u;
-
-	// calculate scale factors
-	vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
-	float y_scale = (float)src_height/(float)dst_height;
-
-	// double buffered processing
-	// buffer switching
-	unsigned int curr_src_idx = 0;
-	unsigned int curr_dst_idx = 0;
-	unsigned int next_src_idx, next_dst_idx;
-
-	// 2 lines y as output, upper and lowerline
-	unsigned int curr_interpl_y_upper = 0;
-	unsigned int next_interpl_y_upper;
-	unsigned int curr_interpl_y_lower, next_interpl_y_lower;
-	// only 1 line v/u output, both planes have the same dimension
-	unsigned int curr_interpl_vu = 0;
-	unsigned int next_interpl_vu;
-
-	// weights, calculated in every loop iteration
-	vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_y_upper;
-	vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
-	vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_vu;
-
-	// line indices for the src picture
-	float curr_src_y_upper = 0.0f, next_src_y_upper;
-	float curr_src_y_lower, next_src_y_lower;
-	float curr_src_vu = 0.0f, next_src_vu;
-
-	// line indices for the dst picture
-	unsigned int dst_y=0, dst_vu=0;
-
-	// offset for the v and u plane to handle misalignement
-	unsigned int curr_lsoff_v = 0, next_lsoff_v;
-	unsigned int curr_lsoff_u = 0, next_lsoff_u;
-
-	// calculate lower line idices
-	curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
-	curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
-	// lower line weight
-	vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
-
-
-	// start partially double buffered processing
-	// get initial data, 2 sets of y, 1 set v, 1 set u
-	mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
-	mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
-			src_dbl_linestride_y,
-			RETR_BUF,
-			0, 0 );
-	mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-	mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-
-	// iteration loop
-	// within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
-	// the scaled output is 2 lines y, 1 line v, 1 line u
-	// the yuv2rgb-converted output is stored to RAM
-	for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
-		dst_y = dst_vu<<1;
-
-		// calculate next indices
-		next_src_vu = ((float)dst_vu+1)*y_scale;
-		next_src_y_upper = ((float)dst_y+2)*y_scale;
-		next_src_y_lower = ((float)dst_y+3)*y_scale;
-
-		next_interpl_vu = (unsigned int) next_src_vu;
-		next_interpl_y_upper = (unsigned int) next_src_y_upper;
-		next_interpl_y_lower = (unsigned int) next_src_y_lower;
-
-		// calculate weight NORTH-SOUTH
-		vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
-		vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
-		vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
-
-		// get next lines
-		next_src_idx = curr_src_idx^1;
-		next_dst_idx = curr_dst_idx^1;
-
-		// 4 lines y
-		mfc_get( y_plane[next_src_idx],
-				(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
-				(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		// 2 lines v
-		precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu);
-		next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F;
-		mfc_get( v_plane[next_src_idx],
-				((unsigned int) precalc_src_addr_v)&0xFFFFFFF0,
-				src_dbl_linestride_vu+(next_lsoff_v<<1),
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		// 2 lines u
-		precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu);
-		next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F;
-		mfc_get( u_plane[next_src_idx],
-				((unsigned int) precalc_src_addr_u)&0xFFFFFFF0,
-				src_dbl_linestride_vu+(next_lsoff_v<<1),
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-		// scaling
-		// work line y_upper
-		bilinear_scale_line_w16( y_plane[curr_src_idx],
-				scaled_y_plane[curr_src_idx],
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_upper,
-				src_linestride_y );
-		// work line y_lower
-		bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-				scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_lower,
-				src_linestride_y );
-		// work line v
-		bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
-				scaled_v_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-		// work line u
-		bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
-				scaled_u_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-
-		//---------------------------------------------------------------------------------------------
-		DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-		// Perform three DMA transfers to 3 different locations in the main memory!
-		// dst_width:	Pixel width of destination image
-		// dst_addr:	Destination address in main memory
-		// dst_vu:	Counter which is incremented one by one
-		// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-
-		mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-				dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-
-		mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-
-		mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-		//---------------------------------------------------------------------------------------------
-
-
-		// update for next cycle
-		curr_src_idx = next_src_idx;
-		curr_dst_idx = next_dst_idx;
-
-		curr_interpl_y_upper = next_interpl_y_upper;
-		curr_interpl_y_lower = next_interpl_y_lower;
-		curr_interpl_vu = next_interpl_vu;
-
-		vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
-		vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
-		vf_curr_NSweight_vu = vf_next_NSweight_vu;
-
-		curr_src_y_upper = next_src_y_upper;
-		curr_src_y_lower = next_src_y_lower;
-		curr_src_vu = next_src_vu;
-
-		curr_lsoff_v = next_lsoff_v;
-		curr_lsoff_u = next_lsoff_u;
-	}
-
-
-
-	DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-	// scaling
-	// work line y_upper
-	bilinear_scale_line_w16( y_plane[curr_src_idx],
-			scaled_y_plane[curr_src_idx],
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_upper,
-			src_linestride_y );
-	// work line y_lower
-	bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_lower,
-			src_linestride_y );
-	// work line v
-	bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
-			scaled_v_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-	// work line u
-	bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
-			scaled_u_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-
-	//---------------------------------------------------------------------------------------------
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-	// Perform three DMA transfers to 3 different locations in the main memory!
-	// dst_width:	Pixel width of destination image
-	// dst_addr:	Destination address in main memory
-	// dst_vu:	Counter which is incremented one by one
-	// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-
-	mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-			dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-			dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-			dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	// wait for completion
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-	//---------------------------------------------------------------------------------------------
-}
-
-
-/*
- * scale_srcw32_dstw16()
- *
- * processes an input image of width 32
- * scaling is done to a width 16
- * yuv2rgb conversion on a width of 16
- * result stored in RAM
- */
-void scale_srcw32_dstw16() {
-	// extract parameters
-	unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
-
-	unsigned int src_width = parms.src_pixel_width;
-	unsigned int src_height = parms.src_pixel_height;
-	unsigned int dst_width = parms.dst_pixel_width;
-	unsigned int dst_height = parms.dst_pixel_height;
-
-	// YVU
-	unsigned int src_linestride_y = src_width;
-	unsigned int src_dbl_linestride_y = src_width<<1;
-	unsigned int src_linestride_vu = src_width>>1;
-	unsigned int src_dbl_linestride_vu = src_width;
-	// scaled YVU
-	unsigned int scaled_src_linestride_y = dst_width;
-
-	// ram addresses
-	unsigned char* src_addr_y = parms.y_plane;
-	unsigned char* src_addr_v = parms.v_plane;
-	unsigned char* src_addr_u = parms.u_plane;
-
-	unsigned int dst_picture_size = dst_width*dst_height;
-
-	// Sizes for destination
-	unsigned int dst_dbl_linestride_y = dst_width<<1;
-	unsigned int dst_dbl_linestride_vu = dst_width>>1;
-
-	// Perform address calculation for Y, V and U in main memory with dst_addr as base
-	unsigned char* dst_addr_main_memory_y = dst_addr;
-	unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
-	unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
-
-	// calculate scale factors
-	vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
-	float y_scale = (float)src_height/(float)dst_height;
-
-	// double buffered processing
-	// buffer switching
-	unsigned int curr_src_idx = 0;
-	unsigned int curr_dst_idx = 0;
-	unsigned int next_src_idx, next_dst_idx;
-
-	// 2 lines y as output, upper and lowerline
-	unsigned int curr_interpl_y_upper = 0;
-	unsigned int next_interpl_y_upper;
-	unsigned int curr_interpl_y_lower, next_interpl_y_lower;
-	// only 1 line v/u output, both planes have the same dimension
-	unsigned int curr_interpl_vu = 0;
-	unsigned int next_interpl_vu;
-
-	// weights, calculated in every loop iteration
-	vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_y_upper;
-	vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
-	vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_vu;
-
-	// line indices for the src picture
-	float curr_src_y_upper = 0.0f, next_src_y_upper;
-	float curr_src_y_lower, next_src_y_lower;
-	float curr_src_vu = 0.0f, next_src_vu;
-
-	// line indices for the dst picture
-	unsigned int dst_y=0, dst_vu=0;
-
-	// calculate lower line idices
-	curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
-	curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
-	// lower line weight
-	vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
-
-
-	// start partially double buffered processing
-	// get initial data, 2 sets of y, 1 set v, 1 set u
-	mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
-	mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
-			src_dbl_linestride_y,
-			RETR_BUF,
-			0, 0 );
-	mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-	mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-
-	// iteration loop
-	// within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
-	// the scaled output is 2 lines y, 1 line v, 1 line u
-	// the yuv2rgb-converted output is stored to RAM
-	for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
-		dst_y = dst_vu<<1;
-
-		// calculate next indices
-		next_src_vu = ((float)dst_vu+1)*y_scale;
-		next_src_y_upper = ((float)dst_y+2)*y_scale;
-		next_src_y_lower = ((float)dst_y+3)*y_scale;
-
-		next_interpl_vu = (unsigned int) next_src_vu;
-		next_interpl_y_upper = (unsigned int) next_src_y_upper;
-		next_interpl_y_lower = (unsigned int) next_src_y_lower;
-
-		// calculate weight NORTH-SOUTH
-		vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
-		vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
-		vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
-
-		// get next lines
-		next_src_idx = curr_src_idx^1;
-		next_dst_idx = curr_dst_idx^1;
-
-		// 4 lines y
-		mfc_get( y_plane[next_src_idx],
-				(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
-				(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		// 2 lines v
-		mfc_get( v_plane[next_src_idx],
-				(unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu),
-				src_dbl_linestride_vu,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		// 2 lines u
-		mfc_get( u_plane[next_src_idx],
-				(unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu),
-				src_dbl_linestride_vu,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-		// scaling
-		// work line y_upper
-		bilinear_scale_line_w16( y_plane[curr_src_idx],
-				scaled_y_plane[curr_src_idx],
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_upper,
-				src_linestride_y );
-		// work line y_lower
-		bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-				scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_lower,
-				src_linestride_y );
-		// work line v
-		bilinear_scale_line_w16( v_plane[curr_src_idx],
-				scaled_v_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-		// work line u
-		bilinear_scale_line_w16( u_plane[curr_src_idx],
-				scaled_u_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-
-		//---------------------------------------------------------------------------------------------
-		DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-		// Perform three DMA transfers to 3 different locations in the main memory!
-		// dst_width:	Pixel width of destination image
-		// dst_addr:	Destination address in main memory
-		// dst_vu:	Counter which is incremented one by one
-		// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-
-		mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-				dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-
-		mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-
-		mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
-				dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-		//---------------------------------------------------------------------------------------------
-
-
-		// update for next cycle
-		curr_src_idx = next_src_idx;
-		curr_dst_idx = next_dst_idx;
-
-		curr_interpl_y_upper = next_interpl_y_upper;
-		curr_interpl_y_lower = next_interpl_y_lower;
-		curr_interpl_vu = next_interpl_vu;
-
-		vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
-		vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
-		vf_curr_NSweight_vu = vf_next_NSweight_vu;
-
-		curr_src_y_upper = next_src_y_upper;
-		curr_src_y_lower = next_src_y_lower;
-		curr_src_vu = next_src_vu;
-	}
-
-
-
-	DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-	// scaling
-	// work line y_upper
-	bilinear_scale_line_w16( y_plane[curr_src_idx],
-			scaled_y_plane[curr_src_idx],
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_upper,
-			src_linestride_y );
-	// work line y_lower
-	bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_lower,
-			src_linestride_y );
-	// work line v
-	bilinear_scale_line_w16( v_plane[curr_src_idx],
-			scaled_v_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-	// work line u
-	bilinear_scale_line_w16( u_plane[curr_src_idx],
-			scaled_u_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-
-
-	//---------------------------------------------------------------------------------------------
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-	// Perform three DMA transfers to 3 different locations in the main memory!
-	// dst_width:	Pixel width of destination image
-	// dst_addr:	Destination address in main memory
-	// dst_vu:	Counter which is incremented one by one
-	// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-
-	mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-			dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-			dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
-			dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	// wait for completion
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-	//---------------------------------------------------------------------------------------------
-}
-
-
-/**
- * scale_srcw32_dstw32()
- *
- * processes an input image of width 32
- * scaling is done to a width 32
- * yuv2rgb conversion on a width of 32
- * result stored in RAM
- */
-void scale_srcw32_dstw32() {
-	// extract parameters
-	unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
-
-	unsigned int src_width = parms.src_pixel_width;
-	unsigned int src_height = parms.src_pixel_height;
-	unsigned int dst_width = parms.dst_pixel_width;
-	unsigned int dst_height = parms.dst_pixel_height;
-
-	// YVU
-	unsigned int src_linestride_y = src_width;
-	unsigned int src_dbl_linestride_y = src_width<<1;
-	unsigned int src_linestride_vu = src_width>>1;
-	unsigned int src_dbl_linestride_vu = src_width;
-
-	// scaled YVU
-	unsigned int scaled_src_linestride_y = dst_width;
-
-	// ram addresses
-	unsigned char* src_addr_y = parms.y_plane;
-	unsigned char* src_addr_v = parms.v_plane;
-	unsigned char* src_addr_u = parms.u_plane;
-
-	unsigned int dst_picture_size = dst_width*dst_height;
-
-	// Sizes for destination
-	unsigned int dst_dbl_linestride_y = dst_width<<1;
-	unsigned int dst_dbl_linestride_vu = dst_width>>1;
-
-	// Perform address calculation for Y, V and U in main memory with dst_addr as base
-	unsigned char* dst_addr_main_memory_y = dst_addr;
-	unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
-	unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
-
-	// calculate scale factors
-	vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
-	float y_scale = (float)src_height/(float)dst_height;
-
-	// double buffered processing
-	// buffer switching
-	unsigned int curr_src_idx = 0;
-	unsigned int curr_dst_idx = 0;
-	unsigned int next_src_idx, next_dst_idx;
-
-	// 2 lines y as output, upper and lowerline
-	unsigned int curr_interpl_y_upper = 0;
-	unsigned int next_interpl_y_upper;
-	unsigned int curr_interpl_y_lower, next_interpl_y_lower;
-	// only 1 line v/u output, both planes have the same dimension
-	unsigned int curr_interpl_vu = 0;
-	unsigned int next_interpl_vu;
-
-	// weights, calculated in every loop iteration
-	vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_y_upper;
-	vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
-	vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_vu;
-
-	// line indices for the src picture
-	float curr_src_y_upper = 0.0f, next_src_y_upper;
-	float curr_src_y_lower, next_src_y_lower;
-	float curr_src_vu = 0.0f, next_src_vu;
-
-	// line indices for the dst picture
-	unsigned int dst_y=0, dst_vu=0;
-
-	// calculate lower line idices
-	curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
-	curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
-	// lower line weight
-	vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
-
-
-	// start partially double buffered processing
-	// get initial data, 2 sets of y, 1 set v, 1 set u
-	mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
-	mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
-			src_dbl_linestride_y,
-			RETR_BUF,
-			0, 0 );
-	mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-	mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-
-	// iteration loop
-	// within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
-	// the scaled output is 2 lines y, 1 line v, 1 line u
-	// the yuv2rgb-converted output is stored to RAM
-	for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
-		dst_y = dst_vu<<1;
-
-		// calculate next indices
-		next_src_vu = ((float)dst_vu+1)*y_scale;
-		next_src_y_upper = ((float)dst_y+2)*y_scale;
-		next_src_y_lower = ((float)dst_y+3)*y_scale;
-
-		next_interpl_vu = (unsigned int) next_src_vu;
-		next_interpl_y_upper = (unsigned int) next_src_y_upper;
-		next_interpl_y_lower = (unsigned int) next_src_y_lower;
-
-		// calculate weight NORTH-SOUTH
-		vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
-		vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
-		vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
-
-		// get next lines
-		next_src_idx = curr_src_idx^1;
-		next_dst_idx = curr_dst_idx^1;
-
-		// 4 lines y
-		mfc_get( y_plane[next_src_idx],
-				(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
-				(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		// 2 lines v
-		mfc_get( v_plane[next_src_idx],
-				(unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu),
-				src_dbl_linestride_vu,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		// 2 lines u
-		mfc_get( u_plane[next_src_idx],
-				(unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu),
-				src_dbl_linestride_vu,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-		// scaling
-		// work line y_upper
-		bilinear_scale_line_w16( y_plane[curr_src_idx],
-				scaled_y_plane[curr_src_idx],
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_upper,
-				src_linestride_y );
-		// work line y_lower
-		bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-				scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_lower,
-				src_linestride_y );
-		// work line v
-		bilinear_scale_line_w16( v_plane[curr_src_idx],
-				scaled_v_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-		// work line u
-		bilinear_scale_line_w16( u_plane[curr_src_idx],
-				scaled_u_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-
-
-
-		// Store the result back to main memory into a destination buffer in YUV format
-		//---------------------------------------------------------------------------------------------
-		DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-		// Perform three DMA transfers to 3 different locations in the main memory!
-		// dst_width:	Pixel width of destination image
-		// dst_addr:	Destination address in main memory
-		// dst_vu:	Counter which is incremented one by one
-		// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-
-		mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-				dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-
-		mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-
-		mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-		//---------------------------------------------------------------------------------------------
-
-
-		// update for next cycle
-		curr_src_idx = next_src_idx;
-		curr_dst_idx = next_dst_idx;
-
-		curr_interpl_y_upper = next_interpl_y_upper;
-		curr_interpl_y_lower = next_interpl_y_lower;
-		curr_interpl_vu = next_interpl_vu;
-
-		vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
-		vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
-		vf_curr_NSweight_vu = vf_next_NSweight_vu;
-
-		curr_src_y_upper = next_src_y_upper;
-		curr_src_y_lower = next_src_y_lower;
-		curr_src_vu = next_src_vu;
-	}
-
-
-
-	DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-	// scaling
-	// work line y_upper
-	bilinear_scale_line_w16( y_plane[curr_src_idx],
-			scaled_y_plane[curr_src_idx],
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_upper,
-			src_linestride_y );
-	// work line y_lower
-	bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_lower,
-			src_linestride_y );
-	// work line v
-	bilinear_scale_line_w16( v_plane[curr_src_idx],
-			scaled_v_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-	// work line u
-	bilinear_scale_line_w16( u_plane[curr_src_idx],
-			scaled_u_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-
-
-	// Store the result back to main memory into a destination buffer in YUV format
-	//---------------------------------------------------------------------------------------------
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-	// Perform three DMA transfers to 3 different locations in the main memory!
-	// dst_width:	Pixel width of destination image
-	// dst_addr:	Destination address in main memory
-	// dst_vu:	Counter which is incremented one by one
-	// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-
-	mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-			dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-			dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
-			dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	// wait for completion
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-	//---------------------------------------------------------------------------------------------
-}
-
-
-/*
- * bilinear_scale_line_w8()
- *
- * processes a line of yuv-input, width has to be a multiple of 8
- * scaled yuv-output is written to local store buffer
- *
- * @param src buffer for 2 lines input
- * @param dst_ buffer for 1 line output
- * @param dst_width the width of the destination line
- * @param vf_x_scale a float vector, at each entry is the x_scale-factor
- * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line
- * @param src_linestride the stride of the srcline
- */
-void bilinear_scale_line_w8( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) {
-
-	unsigned char* dst = dst_;
-
-	unsigned int dst_x;
-	for( dst_x=0; dst_x<dst_width; dst_x+=8) {
-		// address calculation for loading the 4 surrounding pixel of each calculated
-		// destination pixel
-		vector unsigned int vui_dst_x_tmp = spu_splats( dst_x );
-		// lower range->first 4 pixel
-		// upper range->next 4 pixel
-		vector unsigned int vui_inc_dst_x_lower_range = { 0, 1, 2, 3 };
-		vector unsigned int vui_inc_dst_x_upper_range = { 4, 5, 6, 7 };
-		vector unsigned int vui_dst_x_lower_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_lower_range );
-		vector unsigned int vui_dst_x_upper_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_upper_range );
-
-		// calculate weight EAST-WEST
-		vector float vf_dst_x_lower_range = spu_convtf( vui_dst_x_lower_range, 0 );
-		vector float vf_dst_x_upper_range = spu_convtf( vui_dst_x_upper_range, 0 );
-		vector float vf_src_x_lower_range = spu_mul( vf_dst_x_lower_range, vf_x_scale );
-		vector float vf_src_x_upper_range = spu_mul( vf_dst_x_upper_range, vf_x_scale );
-		vector unsigned int vui_interpl_x_lower_range = spu_convtu( vf_src_x_lower_range, 0 );
-		vector unsigned int vui_interpl_x_upper_range = spu_convtu( vf_src_x_upper_range, 0 );
-		vector float vf_interpl_x_lower_range = spu_convtf( vui_interpl_x_lower_range, 0 );
-		vector float vf_interpl_x_upper_range = spu_convtf( vui_interpl_x_upper_range, 0 );
-		vector float vf_EWweight_lower_range = spu_sub( vf_src_x_lower_range, vf_interpl_x_lower_range );
-		vector float vf_EWweight_upper_range = spu_sub( vf_src_x_upper_range, vf_interpl_x_upper_range );
-
-		// calculate address offset
-		//
-		// pixel NORTH WEST
-		vector unsigned int vui_off_pixelNW_lower_range = vui_interpl_x_lower_range;
-		vector unsigned int vui_off_pixelNW_upper_range = vui_interpl_x_upper_range;
-
-		// pixel NORTH EAST-->(offpixelNW+1)
-		vector unsigned int vui_add_1 = { 1, 1, 1, 1 };
-		vector unsigned int vui_off_pixelNE_lower_range = spu_add( vui_off_pixelNW_lower_range, vui_add_1 );
-		vector unsigned int vui_off_pixelNE_upper_range = spu_add( vui_off_pixelNW_upper_range, vui_add_1 );
-
-		// SOUTH-WEST-->(offpixelNW+src_linestride)
-		vector unsigned int vui_srclinestride = spu_splats( src_linestride );
-		vector unsigned int vui_off_pixelSW_lower_range = spu_add( vui_srclinestride, vui_off_pixelNW_lower_range );
-		vector unsigned int vui_off_pixelSW_upper_range = spu_add( vui_srclinestride, vui_off_pixelNW_upper_range );
-
-		// SOUTH-EAST-->(offpixelNW+src_linestride+1)
-		vector unsigned int vui_off_pixelSE_lower_range = spu_add( vui_srclinestride, vui_off_pixelNE_lower_range );
-		vector unsigned int vui_off_pixelSE_upper_range = spu_add( vui_srclinestride, vui_off_pixelNE_upper_range );
-
-		// calculate each address
-		vector unsigned int vui_src_ls = spu_splats( (unsigned int) src );
-		vector unsigned int vui_addr_pixelNW_lower_range = spu_add( vui_src_ls, vui_off_pixelNW_lower_range );
-		vector unsigned int vui_addr_pixelNW_upper_range = spu_add( vui_src_ls, vui_off_pixelNW_upper_range );
-		vector unsigned int vui_addr_pixelNE_lower_range = spu_add( vui_src_ls, vui_off_pixelNE_lower_range );
-		vector unsigned int vui_addr_pixelNE_upper_range = spu_add( vui_src_ls, vui_off_pixelNE_upper_range );
-
-		vector unsigned int vui_addr_pixelSW_lower_range = spu_add( vui_src_ls, vui_off_pixelSW_lower_range );
-		vector unsigned int vui_addr_pixelSW_upper_range = spu_add( vui_src_ls, vui_off_pixelSW_upper_range );
-		vector unsigned int vui_addr_pixelSE_lower_range = spu_add( vui_src_ls, vui_off_pixelSE_lower_range );
-		vector unsigned int vui_addr_pixelSE_upper_range = spu_add( vui_src_ls, vui_off_pixelSE_upper_range );
-
-		// get each pixel
-		//
-		// scalar load, afterwards insertion into the right position
-		// NORTH WEST
-		vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-		vector unsigned char vuc_pixel_NW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 0 )), null_vector, 3 );
-		vuc_pixel_NW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 1 )),
-				vuc_pixel_NW_lower_range, 7 );
-		vuc_pixel_NW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 2 )),
-				vuc_pixel_NW_lower_range, 11 );
-		vuc_pixel_NW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 3 )),
-				vuc_pixel_NW_lower_range, 15 );
-
-		vector unsigned char vuc_pixel_NW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 0 )), null_vector, 3 );
-		vuc_pixel_NW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 1 )),
-				vuc_pixel_NW_upper_range, 7 );
-		vuc_pixel_NW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 2 )),
-				vuc_pixel_NW_upper_range, 11 );
-		vuc_pixel_NW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 3 )),
-				vuc_pixel_NW_upper_range, 15 );
-
-		// NORTH EAST
-		vector unsigned char vuc_pixel_NE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 0 )), null_vector, 3 );
-		vuc_pixel_NE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 1 )),
-				vuc_pixel_NE_lower_range, 7 );
-		vuc_pixel_NE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 2 )),
-				vuc_pixel_NE_lower_range, 11 );
-		vuc_pixel_NE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 3 )),
-				vuc_pixel_NE_lower_range, 15 );
-
-		vector unsigned char vuc_pixel_NE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 0 )), null_vector, 3 );
-		vuc_pixel_NE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 1 )),
-				vuc_pixel_NE_upper_range, 7 );
-		vuc_pixel_NE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 2 )),
-				vuc_pixel_NE_upper_range, 11 );
-		vuc_pixel_NE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 3 )),
-				vuc_pixel_NE_upper_range, 15 );
-
-
-		// SOUTH WEST
-		vector unsigned char vuc_pixel_SW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 0 )), null_vector, 3 );
-		vuc_pixel_SW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 1 )),
-				vuc_pixel_SW_lower_range, 7 );
-		vuc_pixel_SW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 2 )),
-				vuc_pixel_SW_lower_range, 11 );
-		vuc_pixel_SW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 3 )),
-				vuc_pixel_SW_lower_range, 15 );
-
-		vector unsigned char vuc_pixel_SW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 0 )), null_vector, 3 );
-		vuc_pixel_SW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 1 )),
-				vuc_pixel_SW_upper_range, 7 );
-		vuc_pixel_SW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 2 )),
-				vuc_pixel_SW_upper_range, 11 );
-		vuc_pixel_SW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 3 )),
-				vuc_pixel_SW_upper_range, 15 );
-
-		// SOUTH EAST
-		vector unsigned char vuc_pixel_SE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 0 )), null_vector, 3 );
-		vuc_pixel_SE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 1 )),
-				vuc_pixel_SE_lower_range, 7 );
-		vuc_pixel_SE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 2 )),
-				vuc_pixel_SE_lower_range, 11 );
-		vuc_pixel_SE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 3 )),
-				vuc_pixel_SE_lower_range, 15 );
-
-		vector unsigned char vuc_pixel_SE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 0 )), null_vector, 3 );
-		vuc_pixel_SE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 1 )),
-				vuc_pixel_SE_upper_range, 7 );
-		vuc_pixel_SE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 2 )),
-				vuc_pixel_SE_upper_range, 11 );
-		vuc_pixel_SE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 3 )),
-				vuc_pixel_SE_upper_range, 15 );
-
-
-		// convert to float
-		vector float vf_pixel_NW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_lower_range, 0 );
-		vector float vf_pixel_NW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_upper_range, 0 );
-
-		vector float vf_pixel_SW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_lower_range, 0 );
-		vector float vf_pixel_SW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_upper_range, 0 );
-
-		vector float vf_pixel_NE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_lower_range, 0 );
-		vector float vf_pixel_NE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_upper_range, 0 );
-
-		vector float vf_pixel_SE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_lower_range, 0 );
-		vector float vf_pixel_SE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_upper_range, 0 );
-
-
-
-		// first linear interpolation: EWtop
-		// EWtop = NW + EWweight*(NE-NW)
-		//
-		// lower range
-		vector float vf_EWtop_lower_range_tmp = spu_sub( vf_pixel_NE_lower_range, vf_pixel_NW_lower_range );
-		vector float vf_EWtop_lower_range = spu_madd( vf_EWweight_lower_range,
-								vf_EWtop_lower_range_tmp,
-								vf_pixel_NW_lower_range );
-
-		// upper range
-		vector float vf_EWtop_upper_range_tmp = spu_sub( vf_pixel_NE_upper_range, vf_pixel_NW_upper_range );
-		vector float vf_EWtop_upper_range = spu_madd( vf_EWweight_upper_range,
-								vf_EWtop_upper_range_tmp,
-								vf_pixel_NW_upper_range );
-
-
-
-		// second linear interpolation: EWbottom
-		// EWbottom = SW + EWweight*(SE-SW)
-		//
-		// lower range
-		vector float vf_EWbottom_lower_range_tmp = spu_sub( vf_pixel_SE_lower_range, vf_pixel_SW_lower_range );
-		vector float vf_EWbottom_lower_range = spu_madd( vf_EWweight_lower_range,
-								vf_EWbottom_lower_range_tmp,
-								vf_pixel_SW_lower_range );
-
-		// upper range
-		vector float vf_EWbottom_upper_range_tmp = spu_sub( vf_pixel_SE_upper_range, vf_pixel_SW_upper_range );
-		vector float vf_EWbottom_upper_range = spu_madd( vf_EWweight_upper_range,
-								vf_EWbottom_upper_range_tmp,
-								vf_pixel_SW_upper_range );
-
-
-
-		// third linear interpolation: the bilinear interpolated value
-		// result = EWtop + NSweight*(EWbottom-EWtop);
-		//
-		// lower range
-		vector float vf_result_lower_range_tmp = spu_sub( vf_EWbottom_lower_range, vf_EWtop_lower_range );
-		vector float vf_result_lower_range = spu_madd( vf_NSweight,
-								vf_result_lower_range_tmp,
-								vf_EWtop_lower_range );
-
-		// upper range
-		vector float vf_result_upper_range_tmp = spu_sub( vf_EWbottom_upper_range, vf_EWtop_upper_range );
-		vector float vf_result_upper_range = spu_madd( vf_NSweight,
-								vf_result_upper_range_tmp,
-								vf_EWtop_upper_range );
-
-
-		// convert back: using saturated arithmetic
-		vector unsigned int vui_result_lower_range = vfloat_to_vuint( vf_result_lower_range );
-		vector unsigned int vui_result_upper_range = vfloat_to_vuint( vf_result_upper_range );
-
-		// merge results->lower,upper
-		vector unsigned char vuc_mask_merge_result = { 0x03, 0x07, 0x0B, 0x0F,
-							       0x13, 0x17, 0x1B, 0x1F,
-							       0x00, 0x00, 0x00, 0x00,
-							       0x00, 0x00, 0x00, 0x00 };
-
-		vector unsigned char vuc_result = spu_shuffle( (vector unsigned char) vui_result_lower_range,
-								(vector unsigned char) vui_result_upper_range,
-								vuc_mask_merge_result );
-
-		// partial storing
-		vector unsigned char vuc_mask_out = { 0x00, 0x00, 0x00, 0x00,
-						      0x00, 0x00, 0x00, 0x00,
-						      0xFF, 0xFF, 0xFF, 0xFF,
-						      0xFF, 0xFF, 0xFF, 0xFF };
-
-
-		// get currently stored data
-		vector unsigned char vuc_orig = *((vector unsigned char*)dst);
-
-		// clear currently stored data
-		vuc_orig = spu_and( vuc_orig,
-				spu_rlqwbyte( vuc_mask_out, ((unsigned int)dst)&0x0F) );
-
-		// rotate result according to storing address
-		vuc_result = spu_rlqwbyte( vuc_result, ((unsigned int)dst)&0x0F );
-
-		// store result
-		*((vector unsigned char*)dst) = spu_or( vuc_result,
-							vuc_orig );
-		dst += 8;
-	}
-}
-
-
-/*
- * bilinear_scale_line_w16()
- *
- * processes a line of yuv-input, width has to be a multiple of 16
- * scaled yuv-output is written to local store buffer
- *
- * @param src buffer for 2 lines input
- * @param dst_ buffer for 1 line output
- * @param dst_width the width of the destination line
- * @param vf_x_scale a float vector, at each entry is the x_scale-factor
- * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line
- * @param src_linestride the stride of the srcline
- */
-void bilinear_scale_line_w16( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) {
-
-	unsigned char* dst = dst_;
-
-	unsigned int dst_x;
-	for( dst_x=0; dst_x<dst_width; dst_x+=16) {
-		// address calculation for loading the 4 surrounding pixel of each calculated
-		// destination pixel
-		vector unsigned int vui_dst_x_tmp = spu_splats( dst_x );
-		// parallelised processing
-		// first range->pixel 1 2 3 4
-		// second range->pixel 5 6 7 8
-		// third range->pixel 9 10 11 12
-		// fourth range->pixel 13 14 15 16
-		vector unsigned int vui_inc_dst_x_first_range = { 0, 1, 2, 3 };
-		vector unsigned int vui_inc_dst_x_second_range = { 4, 5, 6, 7 };
-		vector unsigned int vui_inc_dst_x_third_range = { 8, 9, 10, 11 };
-		vector unsigned int vui_inc_dst_x_fourth_range = { 12, 13, 14, 15 };
-		vector unsigned int vui_dst_x_first_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_first_range );
-		vector unsigned int vui_dst_x_second_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_second_range );
-		vector unsigned int vui_dst_x_third_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_third_range );
-		vector unsigned int vui_dst_x_fourth_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_fourth_range );
-
-		// calculate weight EAST-WEST
-		vector float vf_dst_x_first_range = spu_convtf( vui_dst_x_first_range, 0 );
-		vector float vf_dst_x_second_range = spu_convtf( vui_dst_x_second_range, 0 );
-		vector float vf_dst_x_third_range = spu_convtf( vui_dst_x_third_range, 0 );
-		vector float vf_dst_x_fourth_range = spu_convtf( vui_dst_x_fourth_range, 0 );
-		vector float vf_src_x_first_range = spu_mul( vf_dst_x_first_range, vf_x_scale );
-		vector float vf_src_x_second_range = spu_mul( vf_dst_x_second_range, vf_x_scale );
-		vector float vf_src_x_third_range = spu_mul( vf_dst_x_third_range, vf_x_scale );
-		vector float vf_src_x_fourth_range = spu_mul( vf_dst_x_fourth_range, vf_x_scale );
-		vector unsigned int vui_interpl_x_first_range = spu_convtu( vf_src_x_first_range, 0 );
-		vector unsigned int vui_interpl_x_second_range = spu_convtu( vf_src_x_second_range, 0 );
-		vector unsigned int vui_interpl_x_third_range = spu_convtu( vf_src_x_third_range, 0 );
-		vector unsigned int vui_interpl_x_fourth_range = spu_convtu( vf_src_x_fourth_range, 0 );
-		vector float vf_interpl_x_first_range = spu_convtf( vui_interpl_x_first_range, 0 );
-		vector float vf_interpl_x_second_range = spu_convtf( vui_interpl_x_second_range, 0 );
-		vector float vf_interpl_x_third_range = spu_convtf( vui_interpl_x_third_range, 0 );
-		vector float vf_interpl_x_fourth_range = spu_convtf( vui_interpl_x_fourth_range, 0 );
-		vector float vf_EWweight_first_range = spu_sub( vf_src_x_first_range, vf_interpl_x_first_range );
-		vector float vf_EWweight_second_range = spu_sub( vf_src_x_second_range, vf_interpl_x_second_range );
-		vector float vf_EWweight_third_range = spu_sub( vf_src_x_third_range, vf_interpl_x_third_range );
-		vector float vf_EWweight_fourth_range = spu_sub( vf_src_x_fourth_range, vf_interpl_x_fourth_range );
-
-		// calculate address offset
-		//
-		// pixel NORTH WEST
-		vector unsigned int vui_off_pixelNW_first_range = vui_interpl_x_first_range;
-		vector unsigned int vui_off_pixelNW_second_range = vui_interpl_x_second_range;
-		vector unsigned int vui_off_pixelNW_third_range = vui_interpl_x_third_range;
-		vector unsigned int vui_off_pixelNW_fourth_range = vui_interpl_x_fourth_range;
-
-		// pixel NORTH EAST-->(offpixelNW+1)
-		vector unsigned int vui_add_1 = { 1, 1, 1, 1 };
-		vector unsigned int vui_off_pixelNE_first_range = spu_add( vui_off_pixelNW_first_range, vui_add_1 );
-		vector unsigned int vui_off_pixelNE_second_range = spu_add( vui_off_pixelNW_second_range, vui_add_1 );
-		vector unsigned int vui_off_pixelNE_third_range = spu_add( vui_off_pixelNW_third_range, vui_add_1 );
-		vector unsigned int vui_off_pixelNE_fourth_range = spu_add( vui_off_pixelNW_fourth_range, vui_add_1 );
-
-		// SOUTH-WEST-->(offpixelNW+src_linestride)
-		vector unsigned int vui_srclinestride = spu_splats( src_linestride );
-		vector unsigned int vui_off_pixelSW_first_range = spu_add( vui_srclinestride, vui_off_pixelNW_first_range );
-		vector unsigned int vui_off_pixelSW_second_range = spu_add( vui_srclinestride, vui_off_pixelNW_second_range );
-		vector unsigned int vui_off_pixelSW_third_range = spu_add( vui_srclinestride, vui_off_pixelNW_third_range );
-		vector unsigned int vui_off_pixelSW_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNW_fourth_range );
-
-		// SOUTH-EAST-->(offpixelNW+src_linestride+1)
-		vector unsigned int vui_off_pixelSE_first_range = spu_add( vui_srclinestride, vui_off_pixelNE_first_range );
-		vector unsigned int vui_off_pixelSE_second_range = spu_add( vui_srclinestride, vui_off_pixelNE_second_range );
-		vector unsigned int vui_off_pixelSE_third_range = spu_add( vui_srclinestride, vui_off_pixelNE_third_range );
-		vector unsigned int vui_off_pixelSE_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNE_fourth_range );
-
-		// calculate each address
-		vector unsigned int vui_src_ls = spu_splats( (unsigned int) src );
-		vector unsigned int vui_addr_pixelNW_first_range = spu_add( vui_src_ls, vui_off_pixelNW_first_range );
-		vector unsigned int vui_addr_pixelNW_second_range = spu_add( vui_src_ls, vui_off_pixelNW_second_range );
-		vector unsigned int vui_addr_pixelNW_third_range = spu_add( vui_src_ls, vui_off_pixelNW_third_range );
-		vector unsigned int vui_addr_pixelNW_fourth_range = spu_add( vui_src_ls, vui_off_pixelNW_fourth_range );
-
-		vector unsigned int vui_addr_pixelNE_first_range = spu_add( vui_src_ls, vui_off_pixelNE_first_range );
-		vector unsigned int vui_addr_pixelNE_second_range = spu_add( vui_src_ls, vui_off_pixelNE_second_range );
-		vector unsigned int vui_addr_pixelNE_third_range = spu_add( vui_src_ls, vui_off_pixelNE_third_range );
-		vector unsigned int vui_addr_pixelNE_fourth_range = spu_add( vui_src_ls, vui_off_pixelNE_fourth_range );
-
-		vector unsigned int vui_addr_pixelSW_first_range = spu_add( vui_src_ls, vui_off_pixelSW_first_range );
-		vector unsigned int vui_addr_pixelSW_second_range = spu_add( vui_src_ls, vui_off_pixelSW_second_range );
-		vector unsigned int vui_addr_pixelSW_third_range = spu_add( vui_src_ls, vui_off_pixelSW_third_range );
-		vector unsigned int vui_addr_pixelSW_fourth_range = spu_add( vui_src_ls, vui_off_pixelSW_fourth_range );
-
-		vector unsigned int vui_addr_pixelSE_first_range = spu_add( vui_src_ls, vui_off_pixelSE_first_range );
-		vector unsigned int vui_addr_pixelSE_second_range = spu_add( vui_src_ls, vui_off_pixelSE_second_range );
-		vector unsigned int vui_addr_pixelSE_third_range = spu_add( vui_src_ls, vui_off_pixelSE_third_range );
-		vector unsigned int vui_addr_pixelSE_fourth_range = spu_add( vui_src_ls, vui_off_pixelSE_fourth_range );
-
-
-		// get each pixel
-		//
-		// scalar load, afterwards insertion into the right position
-		// NORTH WEST
-		// first range
-		vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-		vector unsigned char vuc_pixel_NW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 0 )), null_vector, 3 );
-		vuc_pixel_NW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 1 )),
-				vuc_pixel_NW_first_range, 7 );
-		vuc_pixel_NW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 2 )),
-				vuc_pixel_NW_first_range, 11 );
-		vuc_pixel_NW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 3 )),
-				vuc_pixel_NW_first_range, 15 );
-		// second range
-		vector unsigned char vuc_pixel_NW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 0 )), null_vector, 3 );
-		vuc_pixel_NW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 1 )),
-				vuc_pixel_NW_second_range, 7 );
-		vuc_pixel_NW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 2 )),
-				vuc_pixel_NW_second_range, 11 );
-		vuc_pixel_NW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 3 )),
-				vuc_pixel_NW_second_range, 15 );
-		// third range
-		vector unsigned char vuc_pixel_NW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 0 )), null_vector, 3 );
-		vuc_pixel_NW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 1 )),
-				vuc_pixel_NW_third_range, 7 );
-		vuc_pixel_NW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 2 )),
-				vuc_pixel_NW_third_range, 11 );
-		vuc_pixel_NW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 3 )),
-				vuc_pixel_NW_third_range, 15 );
-		// fourth range
-		vector unsigned char vuc_pixel_NW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 0 )), null_vector, 3 );
-		vuc_pixel_NW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 1 )),
-				vuc_pixel_NW_fourth_range, 7 );
-		vuc_pixel_NW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 2 )),
-				vuc_pixel_NW_fourth_range, 11 );
-		vuc_pixel_NW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 3 )),
-				vuc_pixel_NW_fourth_range, 15 );
-
-		// NORTH EAST
-		// first range
-		vector unsigned char vuc_pixel_NE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 0 )), null_vector, 3 );
-		vuc_pixel_NE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 1 )),
-				vuc_pixel_NE_first_range, 7 );
-		vuc_pixel_NE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 2 )),
-				vuc_pixel_NE_first_range, 11 );
-		vuc_pixel_NE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 3 )),
-				vuc_pixel_NE_first_range, 15 );
-		// second range
-		vector unsigned char vuc_pixel_NE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 0 )), null_vector, 3 );
-		vuc_pixel_NE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 1 )),
-				vuc_pixel_NE_second_range, 7 );
-		vuc_pixel_NE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 2 )),
-				vuc_pixel_NE_second_range, 11 );
-		vuc_pixel_NE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 3 )),
-				vuc_pixel_NE_second_range, 15 );
-		// third range
-		vector unsigned char vuc_pixel_NE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 0 )), null_vector, 3 );
-		vuc_pixel_NE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 1 )),
-				vuc_pixel_NE_third_range, 7 );
-		vuc_pixel_NE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 2 )),
-				vuc_pixel_NE_third_range, 11 );
-		vuc_pixel_NE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 3 )),
-				vuc_pixel_NE_third_range, 15 );
-		// fourth range
-		vector unsigned char vuc_pixel_NE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 0 )), null_vector, 3 );
-		vuc_pixel_NE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 1 )),
-				vuc_pixel_NE_fourth_range, 7 );
-		vuc_pixel_NE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 2 )),
-				vuc_pixel_NE_fourth_range, 11 );
-		vuc_pixel_NE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 3 )),
-				vuc_pixel_NE_fourth_range, 15 );
-
-		// SOUTH WEST
-		// first range
-		vector unsigned char vuc_pixel_SW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 0 )), null_vector, 3 );
-		vuc_pixel_SW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 1 )),
-				vuc_pixel_SW_first_range, 7 );
-		vuc_pixel_SW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 2 )),
-				vuc_pixel_SW_first_range, 11 );
-		vuc_pixel_SW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 3 )),
-				vuc_pixel_SW_first_range, 15 );
-		// second range
-		vector unsigned char vuc_pixel_SW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 0 )), null_vector, 3 );
-		vuc_pixel_SW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 1 )),
-				vuc_pixel_SW_second_range, 7 );
-		vuc_pixel_SW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 2 )),
-				vuc_pixel_SW_second_range, 11 );
-		vuc_pixel_SW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 3 )),
-				vuc_pixel_SW_second_range, 15 );
-		// third range
-		vector unsigned char vuc_pixel_SW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 0 )), null_vector, 3 );
-		vuc_pixel_SW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 1 )),
-				vuc_pixel_SW_third_range, 7 );
-		vuc_pixel_SW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 2 )),
-				vuc_pixel_SW_third_range, 11 );
-		vuc_pixel_SW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 3 )),
-				vuc_pixel_SW_third_range, 15 );
-		// fourth range
-		vector unsigned char vuc_pixel_SW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 0 )), null_vector, 3 );
-		vuc_pixel_SW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 1 )),
-				vuc_pixel_SW_fourth_range, 7 );
-		vuc_pixel_SW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 2 )),
-				vuc_pixel_SW_fourth_range, 11 );
-		vuc_pixel_SW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 3 )),
-				vuc_pixel_SW_fourth_range, 15 );
-
-		// NORTH EAST
-		// first range
-		vector unsigned char vuc_pixel_SE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 0 )), null_vector, 3 );
-		vuc_pixel_SE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 1 )),
-				vuc_pixel_SE_first_range, 7 );
-		vuc_pixel_SE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 2 )),
-				vuc_pixel_SE_first_range, 11 );
-		vuc_pixel_SE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 3 )),
-				vuc_pixel_SE_first_range, 15 );
-		// second range
-		vector unsigned char vuc_pixel_SE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 0 )), null_vector, 3 );
-		vuc_pixel_SE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 1 )),
-				vuc_pixel_SE_second_range, 7 );
-		vuc_pixel_SE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 2 )),
-				vuc_pixel_SE_second_range, 11 );
-		vuc_pixel_SE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 3 )),
-				vuc_pixel_SE_second_range, 15 );
-		// third range
-		vector unsigned char vuc_pixel_SE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 0 )), null_vector, 3 );
-		vuc_pixel_SE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 1 )),
-				vuc_pixel_SE_third_range, 7 );
-		vuc_pixel_SE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 2 )),
-				vuc_pixel_SE_third_range, 11 );
-		vuc_pixel_SE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 3 )),
-				vuc_pixel_SE_third_range, 15 );
-		// fourth range
-		vector unsigned char vuc_pixel_SE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 0 )), null_vector, 3 );
-		vuc_pixel_SE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 1 )),
-				vuc_pixel_SE_fourth_range, 7 );
-		vuc_pixel_SE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 2 )),
-				vuc_pixel_SE_fourth_range, 11 );
-		vuc_pixel_SE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 3 )),
-				vuc_pixel_SE_fourth_range, 15 );
-
-
-
-		// convert to float
-		vector float vf_pixel_NW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_first_range, 0 );
-		vector float vf_pixel_NW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_second_range, 0 );
-		vector float vf_pixel_NW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_third_range, 0 );
-		vector float vf_pixel_NW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_fourth_range, 0 );
-
-		vector float vf_pixel_NE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_first_range, 0 );
-		vector float vf_pixel_NE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_second_range, 0 );
-		vector float vf_pixel_NE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_third_range, 0 );
-		vector float vf_pixel_NE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_fourth_range, 0 );
-
-		vector float vf_pixel_SW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_first_range, 0 );
-		vector float vf_pixel_SW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_second_range, 0 );
-		vector float vf_pixel_SW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_third_range, 0 );
-		vector float vf_pixel_SW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_fourth_range, 0 );
-
-		vector float vf_pixel_SE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_first_range, 0 );
-		vector float vf_pixel_SE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_second_range, 0 );
-		vector float vf_pixel_SE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_third_range, 0 );
-		vector float vf_pixel_SE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_fourth_range, 0 );
-
-		// first linear interpolation: EWtop
-		// EWtop = NW + EWweight*(NE-NW)
-		//
-		// first range
-		vector float vf_EWtop_first_range_tmp = spu_sub( vf_pixel_NE_first_range, vf_pixel_NW_first_range );
-		vector float vf_EWtop_first_range = spu_madd( vf_EWweight_first_range,
-								vf_EWtop_first_range_tmp,
-								vf_pixel_NW_first_range );
-
-		// second range
-		vector float vf_EWtop_second_range_tmp = spu_sub( vf_pixel_NE_second_range, vf_pixel_NW_second_range );
-		vector float vf_EWtop_second_range = spu_madd( vf_EWweight_second_range,
-								vf_EWtop_second_range_tmp,
-								vf_pixel_NW_second_range );
-
-		// third range
-		vector float vf_EWtop_third_range_tmp = spu_sub( vf_pixel_NE_third_range, vf_pixel_NW_third_range );
-		vector float vf_EWtop_third_range = spu_madd( vf_EWweight_third_range,
-								vf_EWtop_third_range_tmp,
-								vf_pixel_NW_third_range );
-
-		// fourth range
-		vector float vf_EWtop_fourth_range_tmp = spu_sub( vf_pixel_NE_fourth_range, vf_pixel_NW_fourth_range );
-		vector float vf_EWtop_fourth_range = spu_madd( vf_EWweight_fourth_range,
-								vf_EWtop_fourth_range_tmp,
-								vf_pixel_NW_fourth_range );
-
-
-
-		// second linear interpolation: EWbottom
-		// EWbottom = SW + EWweight*(SE-SW)
-		//
-		// first range
-		vector float vf_EWbottom_first_range_tmp = spu_sub( vf_pixel_SE_first_range, vf_pixel_SW_first_range );
-		vector float vf_EWbottom_first_range = spu_madd( vf_EWweight_first_range,
-								vf_EWbottom_first_range_tmp,
-								vf_pixel_SW_first_range );
-
-		// second range
-		vector float vf_EWbottom_second_range_tmp = spu_sub( vf_pixel_SE_second_range, vf_pixel_SW_second_range );
-		vector float vf_EWbottom_second_range = spu_madd( vf_EWweight_second_range,
-								vf_EWbottom_second_range_tmp,
-								vf_pixel_SW_second_range );
-		// first range
-		vector float vf_EWbottom_third_range_tmp = spu_sub( vf_pixel_SE_third_range, vf_pixel_SW_third_range );
-		vector float vf_EWbottom_third_range = spu_madd( vf_EWweight_third_range,
-								vf_EWbottom_third_range_tmp,
-								vf_pixel_SW_third_range );
-
-		// first range
-		vector float vf_EWbottom_fourth_range_tmp = spu_sub( vf_pixel_SE_fourth_range, vf_pixel_SW_fourth_range );
-		vector float vf_EWbottom_fourth_range = spu_madd( vf_EWweight_fourth_range,
-								vf_EWbottom_fourth_range_tmp,
-								vf_pixel_SW_fourth_range );
-
-
-
-		// third linear interpolation: the bilinear interpolated value
-		// result = EWtop + NSweight*(EWbottom-EWtop);
-		//
-		// first range
-		vector float vf_result_first_range_tmp = spu_sub( vf_EWbottom_first_range, vf_EWtop_first_range );
-		vector float vf_result_first_range = spu_madd( vf_NSweight,
-								vf_result_first_range_tmp,
-								vf_EWtop_first_range );
-
-		// second range
-		vector float vf_result_second_range_tmp = spu_sub( vf_EWbottom_second_range, vf_EWtop_second_range );
-		vector float vf_result_second_range = spu_madd( vf_NSweight,
-								vf_result_second_range_tmp,
-								vf_EWtop_second_range );
-
-		// third range
-		vector float vf_result_third_range_tmp = spu_sub( vf_EWbottom_third_range, vf_EWtop_third_range );
-		vector float vf_result_third_range = spu_madd( vf_NSweight,
-								vf_result_third_range_tmp,
-								vf_EWtop_third_range );
-
-		// fourth range
-		vector float vf_result_fourth_range_tmp = spu_sub( vf_EWbottom_fourth_range, vf_EWtop_fourth_range );
-		vector float vf_result_fourth_range = spu_madd( vf_NSweight,
-								vf_result_fourth_range_tmp,
-								vf_EWtop_fourth_range );
-
-
-
-		// convert back: using saturated arithmetic
-		vector unsigned int vui_result_first_range = vfloat_to_vuint( vf_result_first_range );
-		vector unsigned int vui_result_second_range = vfloat_to_vuint( vf_result_second_range );
-		vector unsigned int vui_result_third_range = vfloat_to_vuint( vf_result_third_range );
-		vector unsigned int vui_result_fourth_range = vfloat_to_vuint( vf_result_fourth_range );
-
-		// merge results->lower,upper
-		vector unsigned char vuc_mask_merge_result_first_second = { 0x03, 0x07, 0x0B, 0x0F,
-							       		    0x13, 0x17, 0x1B, 0x1F,
-							       		    0x00, 0x00, 0x00, 0x00,
-							       		    0x00, 0x00, 0x00, 0x00 };
-
-		vector unsigned char vuc_mask_merge_result_third_fourth = { 0x00, 0x00, 0x00, 0x00,
-							       		    0x00, 0x00, 0x00, 0x00,
-									    0x03, 0x07, 0x0B, 0x0F,
-							       		    0x13, 0x17, 0x1B, 0x1F };
-
-		vector unsigned char vuc_result_first_second =
-						spu_shuffle( (vector unsigned char) vui_result_first_range,
-								 (vector unsigned char) vui_result_second_range,
-								vuc_mask_merge_result_first_second );
-
-		vector unsigned char vuc_result_third_fourth =
-						spu_shuffle( (vector unsigned char) vui_result_third_range,
-								 (vector unsigned char) vui_result_fourth_range,
-								vuc_mask_merge_result_third_fourth );
-
-		// store result
-		*((vector unsigned char*)dst) = spu_or( vuc_result_first_second,
-							vuc_result_third_fourth );
-		dst += 16;
-	}
-}
-
-- 
cgit v1.2.3