From 6039eb05ba6d82ef56f2868c96654c552d117bf9 Mon Sep 17 00:00:00 2001 From: Franklin Wei Date: Wed, 7 Feb 2018 20:04:46 -0500 Subject: sdl: remove non-rockbox drivers We never use any of these other drivers, so having them around just takes up space. Change-Id: Iced812162df1fef3fd55522b7e700acb6c3bcd41 --- .../sdl/src/video/ps3/spulibs/bilin_scaler.c | 2050 -------------------- 1 file changed, 2050 deletions(-) delete mode 100644 apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c (limited to 'apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c') diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c b/apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c deleted file mode 100644 index be9b5c6e8d..0000000000 --- a/apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c +++ /dev/null @@ -1,2050 +0,0 @@ -/* - * SDL - Simple DirectMedia Layer - * CELL BE Support for PS3 Framebuffer - * Copyright (C) 2008, 2009 International Business Machines Corporation - * - * This library is free software; you can redistribute it and/or modify it - * under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 - * USA - * - * Martin Lowinski - * Dirk Herrendoerfer - * SPE code based on research by: - * Rene Becker - * Thimo Emmerich - */ - -#include "spu_common.h" - -#include -#include - -// Debugging -//#define DEBUG - -#ifdef DEBUG -#define deprintf(fmt, args... ) \ - fprintf( stdout, fmt, ##args ); \ - fflush( stdout ); -#else -#define deprintf( fmt, args... ) -#endif - -struct scale_parms_t parms __attribute__((aligned(128))); - -/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored - * there might be the need to retrieve misaligned data, adjust - * incoming v and u plane to be able to handle this (add 128) - */ -unsigned char y_plane[2][(MAX_HDTV_WIDTH+128)*4] __attribute__((aligned(128))); -unsigned char v_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128))); -unsigned char u_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128))); - -/* temp-buffer for scaling: 4 lines Y, therefore 2 lines V, 2 lines U */ -unsigned char scaled_y_plane[2][MAX_HDTV_WIDTH*2] __attribute__((aligned(128))); -unsigned char scaled_v_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128))); -unsigned char scaled_u_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128))); - -/* some vectors needed by the float to int conversion */ -static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f }; -static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f }; - -void bilinear_scale_line_w8(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride); -void bilinear_scale_line_w16(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride); - -void scale_srcw16_dstw16(); -void scale_srcw16_dstw32(); -void scale_srcw32_dstw16(); -void scale_srcw32_dstw32(); - -int main( unsigned long long spe_id __attribute__((unused)), unsigned long long argp ) -{ - deprintf("[SPU] bilin_scaler_spu is up... (on SPE #%llu)\n", spe_id); - /* DMA transfer for the input parameters */ - spu_mfcdma32(&parms, (unsigned int)argp, sizeof(struct scale_parms_t), TAG_INIT, MFC_GET_CMD); - DMA_WAIT_TAG(TAG_INIT); - - deprintf("[SPU] Scale %ux%u to %ux%u\n", parms.src_pixel_width, parms.src_pixel_height, - parms.dst_pixel_width, parms.dst_pixel_height); - - if(parms.src_pixel_width & 0x1f) { - if(parms.dst_pixel_width & 0x1F) { - deprintf("[SPU] Using scale_srcw16_dstw16\n"); - scale_srcw16_dstw16(); - } else { - deprintf("[SPU] Using scale_srcw16_dstw32\n"); - scale_srcw16_dstw32(); - } - } else { - if(parms.dst_pixel_width & 0x1F) { - deprintf("[SPU] Using scale_srcw32_dstw16\n"); - scale_srcw32_dstw16(); - } else { - deprintf("[SPU] Using scale_srcw32_dstw32\n"); - scale_srcw32_dstw32(); - } - } - deprintf("[SPU] bilin_scaler_spu... done!\n"); - - return 0; -} - - -/* - * vfloat_to_vuint() - * - * converts a float vector to an unsinged int vector using saturated - * arithmetic - * - * @param vec_s float vector for conversion - * @returns converted unsigned int vector - */ -inline static vector unsigned int vfloat_to_vuint(vector float vec_s) { - vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); - vec_s = spu_sel(vec_s, vec_0_1, select_1); - - vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); - vec_s = spu_sel(vec_s, vec_255, select_2); - return spu_convtu(vec_s,0); -} - - -/* - * scale_srcw16_dstw16() - * - * processes an input image of width 16 - * scaling is done to a width 16 - * result stored in RAM - */ -void scale_srcw16_dstw16() { - // extract parameters - unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; - - unsigned int src_width = parms.src_pixel_width; - unsigned int src_height = parms.src_pixel_height; - unsigned int dst_width = parms.dst_pixel_width; - unsigned int dst_height = parms.dst_pixel_height; - - // YVU - unsigned int src_linestride_y = src_width; - unsigned int src_dbl_linestride_y = src_width<<1; - unsigned int src_linestride_vu = src_width>>1; - unsigned int src_dbl_linestride_vu = src_width; - - // scaled YVU - unsigned int scaled_src_linestride_y = dst_width; - - // ram addresses - unsigned char* src_addr_y = parms.y_plane; - unsigned char* src_addr_v = parms.v_plane; - unsigned char* src_addr_u = parms.u_plane; - - // for handling misalignment, addresses are precalculated - unsigned char* precalc_src_addr_v = src_addr_v; - unsigned char* precalc_src_addr_u = src_addr_u; - - unsigned int dst_picture_size = dst_width*dst_height; - - // Sizes for destination - unsigned int dst_dbl_linestride_y = dst_width<<1; - unsigned int dst_dbl_linestride_vu = dst_width>>1; - - // Perform address calculation for Y, V and U in main memory with dst_addr as base - unsigned char* dst_addr_main_memory_y = dst_addr; - unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; - unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); - - // calculate scale factors - vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); - float y_scale = (float)src_height/(float)dst_height; - - // double buffered processing - // buffer switching - unsigned int curr_src_idx = 0; - unsigned int curr_dst_idx = 0; - unsigned int next_src_idx, next_dst_idx; - - // 2 lines y as output, upper and lowerline - unsigned int curr_interpl_y_upper = 0; - unsigned int next_interpl_y_upper; - unsigned int curr_interpl_y_lower, next_interpl_y_lower; - // only 1 line v/u output, both planes have the same dimension - unsigned int curr_interpl_vu = 0; - unsigned int next_interpl_vu; - - // weights, calculated in every loop iteration - vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_y_upper; - vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; - vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_vu; - - // line indices for the src picture - float curr_src_y_upper = 0.0f, next_src_y_upper; - float curr_src_y_lower, next_src_y_lower; - float curr_src_vu = 0.0f, next_src_vu; - - // line indices for the dst picture - unsigned int dst_y=0, dst_vu=0; - - // offset for the v and u plane to handle misalignement - unsigned int curr_lsoff_v = 0, next_lsoff_v; - unsigned int curr_lsoff_u = 0, next_lsoff_u; - - // calculate lower line indices - curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; - curr_interpl_y_lower = (unsigned int)curr_src_y_lower; - // lower line weight - vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); - - - // start partially double buffered processing - // get initial data, 2 sets of y, 1 set v, 1 set u - mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); - mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF, - 0, 0 ); - mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - - /* iteration loop - * within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved - * the scaled output is 2 lines y, 1 line v, 1 line u - * the yuv2rgb-converted output is stored to RAM - */ - for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { - dst_y = dst_vu<<1; - - // calculate next indices - next_src_vu = ((float)dst_vu+1)*y_scale; - next_src_y_upper = ((float)dst_y+2)*y_scale; - next_src_y_lower = ((float)dst_y+3)*y_scale; - - next_interpl_vu = (unsigned int) next_src_vu; - next_interpl_y_upper = (unsigned int) next_src_y_upper; - next_interpl_y_lower = (unsigned int) next_src_y_lower; - - // calculate weight NORTH-SOUTH - vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); - vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); - vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); - - // get next lines - next_src_idx = curr_src_idx^1; - next_dst_idx = curr_dst_idx^1; - - // 4 lines y - mfc_get( y_plane[next_src_idx], - (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - - // 2 lines v - precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu); - next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F; - mfc_get( v_plane[next_src_idx], - ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0, - src_dbl_linestride_vu+(next_lsoff_v<<1), - RETR_BUF+next_src_idx, - 0, 0 ); - // 2 lines u - precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu); - next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F; - mfc_get( u_plane[next_src_idx], - ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0, - src_dbl_linestride_vu+(next_lsoff_v<<1), - RETR_BUF+next_src_idx, - 0, 0 ); - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - - // Store the result back to main memory into a destination buffer in YUV format - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - //--------------------------------------------------------------------------------------------- - - - // update for next cycle - curr_src_idx = next_src_idx; - curr_dst_idx = next_dst_idx; - - curr_interpl_y_upper = next_interpl_y_upper; - curr_interpl_y_lower = next_interpl_y_lower; - curr_interpl_vu = next_interpl_vu; - - vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; - vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; - vf_curr_NSweight_vu = vf_next_NSweight_vu; - - curr_src_y_upper = next_src_y_upper; - curr_src_y_lower = next_src_y_lower; - curr_src_vu = next_src_vu; - - curr_lsoff_v = next_lsoff_v; - curr_lsoff_u = next_lsoff_u; - } - - - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - - // Store the result back to main memory into a destination buffer in YUV format - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - // wait for completion - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - //--------------------------------------------------------------------------------------------- -} - - -/* - * scale_srcw16_dstw32() - * - * processes an input image of width 16 - * scaling is done to a width 32 - * yuv2rgb conversion on a width of 32 - * result stored in RAM - */ -void scale_srcw16_dstw32() { - // extract parameters - unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; - - unsigned int src_width = parms.src_pixel_width; - unsigned int src_height = parms.src_pixel_height; - unsigned int dst_width = parms.dst_pixel_width; - unsigned int dst_height = parms.dst_pixel_height; - - // YVU - unsigned int src_linestride_y = src_width; - unsigned int src_dbl_linestride_y = src_width<<1; - unsigned int src_linestride_vu = src_width>>1; - unsigned int src_dbl_linestride_vu = src_width; - // scaled YVU - unsigned int scaled_src_linestride_y = dst_width; - - // ram addresses - unsigned char* src_addr_y = parms.y_plane; - unsigned char* src_addr_v = parms.v_plane; - unsigned char* src_addr_u = parms.u_plane; - - unsigned int dst_picture_size = dst_width*dst_height; - - // Sizes for destination - unsigned int dst_dbl_linestride_y = dst_width<<1; - unsigned int dst_dbl_linestride_vu = dst_width>>1; - - // Perform address calculation for Y, V and U in main memory with dst_addr as base - unsigned char* dst_addr_main_memory_y = dst_addr; - unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; - unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); - - - // for handling misalignment, addresses are precalculated - unsigned char* precalc_src_addr_v = src_addr_v; - unsigned char* precalc_src_addr_u = src_addr_u; - - // calculate scale factors - vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); - float y_scale = (float)src_height/(float)dst_height; - - // double buffered processing - // buffer switching - unsigned int curr_src_idx = 0; - unsigned int curr_dst_idx = 0; - unsigned int next_src_idx, next_dst_idx; - - // 2 lines y as output, upper and lowerline - unsigned int curr_interpl_y_upper = 0; - unsigned int next_interpl_y_upper; - unsigned int curr_interpl_y_lower, next_interpl_y_lower; - // only 1 line v/u output, both planes have the same dimension - unsigned int curr_interpl_vu = 0; - unsigned int next_interpl_vu; - - // weights, calculated in every loop iteration - vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_y_upper; - vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; - vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_vu; - - // line indices for the src picture - float curr_src_y_upper = 0.0f, next_src_y_upper; - float curr_src_y_lower, next_src_y_lower; - float curr_src_vu = 0.0f, next_src_vu; - - // line indices for the dst picture - unsigned int dst_y=0, dst_vu=0; - - // offset for the v and u plane to handle misalignement - unsigned int curr_lsoff_v = 0, next_lsoff_v; - unsigned int curr_lsoff_u = 0, next_lsoff_u; - - // calculate lower line idices - curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; - curr_interpl_y_lower = (unsigned int)curr_src_y_lower; - // lower line weight - vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); - - - // start partially double buffered processing - // get initial data, 2 sets of y, 1 set v, 1 set u - mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); - mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF, - 0, 0 ); - mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - - // iteration loop - // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved - // the scaled output is 2 lines y, 1 line v, 1 line u - // the yuv2rgb-converted output is stored to RAM - for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { - dst_y = dst_vu<<1; - - // calculate next indices - next_src_vu = ((float)dst_vu+1)*y_scale; - next_src_y_upper = ((float)dst_y+2)*y_scale; - next_src_y_lower = ((float)dst_y+3)*y_scale; - - next_interpl_vu = (unsigned int) next_src_vu; - next_interpl_y_upper = (unsigned int) next_src_y_upper; - next_interpl_y_lower = (unsigned int) next_src_y_lower; - - // calculate weight NORTH-SOUTH - vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); - vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); - vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); - - // get next lines - next_src_idx = curr_src_idx^1; - next_dst_idx = curr_dst_idx^1; - - // 4 lines y - mfc_get( y_plane[next_src_idx], - (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - - // 2 lines v - precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu); - next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F; - mfc_get( v_plane[next_src_idx], - ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0, - src_dbl_linestride_vu+(next_lsoff_v<<1), - RETR_BUF+next_src_idx, - 0, 0 ); - // 2 lines u - precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu); - next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F; - mfc_get( u_plane[next_src_idx], - ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0, - src_dbl_linestride_vu+(next_lsoff_v<<1), - RETR_BUF+next_src_idx, - 0, 0 ); - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - //--------------------------------------------------------------------------------------------- - - - // update for next cycle - curr_src_idx = next_src_idx; - curr_dst_idx = next_dst_idx; - - curr_interpl_y_upper = next_interpl_y_upper; - curr_interpl_y_lower = next_interpl_y_lower; - curr_interpl_vu = next_interpl_vu; - - vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; - vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; - vf_curr_NSweight_vu = vf_next_NSweight_vu; - - curr_src_y_upper = next_src_y_upper; - curr_src_y_lower = next_src_y_lower; - curr_src_vu = next_src_vu; - - curr_lsoff_v = next_lsoff_v; - curr_lsoff_u = next_lsoff_u; - } - - - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - // wait for completion - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - //--------------------------------------------------------------------------------------------- -} - - -/* - * scale_srcw32_dstw16() - * - * processes an input image of width 32 - * scaling is done to a width 16 - * yuv2rgb conversion on a width of 16 - * result stored in RAM - */ -void scale_srcw32_dstw16() { - // extract parameters - unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; - - unsigned int src_width = parms.src_pixel_width; - unsigned int src_height = parms.src_pixel_height; - unsigned int dst_width = parms.dst_pixel_width; - unsigned int dst_height = parms.dst_pixel_height; - - // YVU - unsigned int src_linestride_y = src_width; - unsigned int src_dbl_linestride_y = src_width<<1; - unsigned int src_linestride_vu = src_width>>1; - unsigned int src_dbl_linestride_vu = src_width; - // scaled YVU - unsigned int scaled_src_linestride_y = dst_width; - - // ram addresses - unsigned char* src_addr_y = parms.y_plane; - unsigned char* src_addr_v = parms.v_plane; - unsigned char* src_addr_u = parms.u_plane; - - unsigned int dst_picture_size = dst_width*dst_height; - - // Sizes for destination - unsigned int dst_dbl_linestride_y = dst_width<<1; - unsigned int dst_dbl_linestride_vu = dst_width>>1; - - // Perform address calculation for Y, V and U in main memory with dst_addr as base - unsigned char* dst_addr_main_memory_y = dst_addr; - unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; - unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); - - // calculate scale factors - vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); - float y_scale = (float)src_height/(float)dst_height; - - // double buffered processing - // buffer switching - unsigned int curr_src_idx = 0; - unsigned int curr_dst_idx = 0; - unsigned int next_src_idx, next_dst_idx; - - // 2 lines y as output, upper and lowerline - unsigned int curr_interpl_y_upper = 0; - unsigned int next_interpl_y_upper; - unsigned int curr_interpl_y_lower, next_interpl_y_lower; - // only 1 line v/u output, both planes have the same dimension - unsigned int curr_interpl_vu = 0; - unsigned int next_interpl_vu; - - // weights, calculated in every loop iteration - vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_y_upper; - vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; - vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_vu; - - // line indices for the src picture - float curr_src_y_upper = 0.0f, next_src_y_upper; - float curr_src_y_lower, next_src_y_lower; - float curr_src_vu = 0.0f, next_src_vu; - - // line indices for the dst picture - unsigned int dst_y=0, dst_vu=0; - - // calculate lower line idices - curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; - curr_interpl_y_lower = (unsigned int)curr_src_y_lower; - // lower line weight - vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); - - - // start partially double buffered processing - // get initial data, 2 sets of y, 1 set v, 1 set u - mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); - mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF, - 0, 0 ); - mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - - // iteration loop - // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved - // the scaled output is 2 lines y, 1 line v, 1 line u - // the yuv2rgb-converted output is stored to RAM - for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { - dst_y = dst_vu<<1; - - // calculate next indices - next_src_vu = ((float)dst_vu+1)*y_scale; - next_src_y_upper = ((float)dst_y+2)*y_scale; - next_src_y_lower = ((float)dst_y+3)*y_scale; - - next_interpl_vu = (unsigned int) next_src_vu; - next_interpl_y_upper = (unsigned int) next_src_y_upper; - next_interpl_y_lower = (unsigned int) next_src_y_lower; - - // calculate weight NORTH-SOUTH - vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); - vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); - vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); - - // get next lines - next_src_idx = curr_src_idx^1; - next_dst_idx = curr_dst_idx^1; - - // 4 lines y - mfc_get( y_plane[next_src_idx], - (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - - // 2 lines v - mfc_get( v_plane[next_src_idx], - (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu), - src_dbl_linestride_vu, - RETR_BUF+next_src_idx, - 0, 0 ); - // 2 lines u - mfc_get( u_plane[next_src_idx], - (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu), - src_dbl_linestride_vu, - RETR_BUF+next_src_idx, - 0, 0 ); - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w16( v_plane[curr_src_idx], - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w16( u_plane[curr_src_idx], - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - //--------------------------------------------------------------------------------------------- - - - // update for next cycle - curr_src_idx = next_src_idx; - curr_dst_idx = next_dst_idx; - - curr_interpl_y_upper = next_interpl_y_upper; - curr_interpl_y_lower = next_interpl_y_lower; - curr_interpl_vu = next_interpl_vu; - - vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; - vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; - vf_curr_NSweight_vu = vf_next_NSweight_vu; - - curr_src_y_upper = next_src_y_upper; - curr_src_y_lower = next_src_y_lower; - curr_src_vu = next_src_vu; - } - - - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w16( v_plane[curr_src_idx], - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w16( u_plane[curr_src_idx], - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - // wait for completion - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - //--------------------------------------------------------------------------------------------- -} - - -/** - * scale_srcw32_dstw32() - * - * processes an input image of width 32 - * scaling is done to a width 32 - * yuv2rgb conversion on a width of 32 - * result stored in RAM - */ -void scale_srcw32_dstw32() { - // extract parameters - unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; - - unsigned int src_width = parms.src_pixel_width; - unsigned int src_height = parms.src_pixel_height; - unsigned int dst_width = parms.dst_pixel_width; - unsigned int dst_height = parms.dst_pixel_height; - - // YVU - unsigned int src_linestride_y = src_width; - unsigned int src_dbl_linestride_y = src_width<<1; - unsigned int src_linestride_vu = src_width>>1; - unsigned int src_dbl_linestride_vu = src_width; - - // scaled YVU - unsigned int scaled_src_linestride_y = dst_width; - - // ram addresses - unsigned char* src_addr_y = parms.y_plane; - unsigned char* src_addr_v = parms.v_plane; - unsigned char* src_addr_u = parms.u_plane; - - unsigned int dst_picture_size = dst_width*dst_height; - - // Sizes for destination - unsigned int dst_dbl_linestride_y = dst_width<<1; - unsigned int dst_dbl_linestride_vu = dst_width>>1; - - // Perform address calculation for Y, V and U in main memory with dst_addr as base - unsigned char* dst_addr_main_memory_y = dst_addr; - unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; - unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); - - // calculate scale factors - vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); - float y_scale = (float)src_height/(float)dst_height; - - // double buffered processing - // buffer switching - unsigned int curr_src_idx = 0; - unsigned int curr_dst_idx = 0; - unsigned int next_src_idx, next_dst_idx; - - // 2 lines y as output, upper and lowerline - unsigned int curr_interpl_y_upper = 0; - unsigned int next_interpl_y_upper; - unsigned int curr_interpl_y_lower, next_interpl_y_lower; - // only 1 line v/u output, both planes have the same dimension - unsigned int curr_interpl_vu = 0; - unsigned int next_interpl_vu; - - // weights, calculated in every loop iteration - vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_y_upper; - vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; - vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; - vector float vf_next_NSweight_vu; - - // line indices for the src picture - float curr_src_y_upper = 0.0f, next_src_y_upper; - float curr_src_y_lower, next_src_y_lower; - float curr_src_vu = 0.0f, next_src_vu; - - // line indices for the dst picture - unsigned int dst_y=0, dst_vu=0; - - // calculate lower line idices - curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; - curr_interpl_y_lower = (unsigned int)curr_src_y_lower; - // lower line weight - vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); - - - // start partially double buffered processing - // get initial data, 2 sets of y, 1 set v, 1 set u - mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); - mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF, - 0, 0 ); - mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); - - // iteration loop - // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved - // the scaled output is 2 lines y, 1 line v, 1 line u - // the yuv2rgb-converted output is stored to RAM - for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { - dst_y = dst_vu<<1; - - // calculate next indices - next_src_vu = ((float)dst_vu+1)*y_scale; - next_src_y_upper = ((float)dst_y+2)*y_scale; - next_src_y_lower = ((float)dst_y+3)*y_scale; - - next_interpl_vu = (unsigned int) next_src_vu; - next_interpl_y_upper = (unsigned int) next_src_y_upper; - next_interpl_y_lower = (unsigned int) next_src_y_lower; - - // calculate weight NORTH-SOUTH - vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); - vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); - vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); - - // get next lines - next_src_idx = curr_src_idx^1; - next_dst_idx = curr_dst_idx^1; - - // 4 lines y - mfc_get( y_plane[next_src_idx], - (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, - (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), - src_dbl_linestride_y, - RETR_BUF+next_src_idx, - 0, 0 ); - - // 2 lines v - mfc_get( v_plane[next_src_idx], - (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu), - src_dbl_linestride_vu, - RETR_BUF+next_src_idx, - 0, 0 ); - // 2 lines u - mfc_get( u_plane[next_src_idx], - (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu), - src_dbl_linestride_vu, - RETR_BUF+next_src_idx, - 0, 0 ); - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w16( v_plane[curr_src_idx], - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w16( u_plane[curr_src_idx], - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - - - // Store the result back to main memory into a destination buffer in YUV format - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - //--------------------------------------------------------------------------------------------- - - - // update for next cycle - curr_src_idx = next_src_idx; - curr_dst_idx = next_dst_idx; - - curr_interpl_y_upper = next_interpl_y_upper; - curr_interpl_y_lower = next_interpl_y_lower; - curr_interpl_vu = next_interpl_vu; - - vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; - vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; - vf_curr_NSweight_vu = vf_next_NSweight_vu; - - curr_src_y_upper = next_src_y_upper; - curr_src_y_lower = next_src_y_lower; - curr_src_vu = next_src_vu; - } - - - - DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); - - // scaling - // work line y_upper - bilinear_scale_line_w16( y_plane[curr_src_idx], - scaled_y_plane[curr_src_idx], - dst_width, - vf_x_scale, - vf_curr_NSweight_y_upper, - src_linestride_y ); - // work line y_lower - bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, - scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, - dst_width, - vf_x_scale, - vf_curr_NSweight_y_lower, - src_linestride_y ); - // work line v - bilinear_scale_line_w16( v_plane[curr_src_idx], - scaled_v_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - // work line u - bilinear_scale_line_w16( u_plane[curr_src_idx], - scaled_u_plane[curr_src_idx], - dst_width>>1, - vf_x_scale, - vf_curr_NSweight_vu, - src_linestride_vu ); - - - // Store the result back to main memory into a destination buffer in YUV format - //--------------------------------------------------------------------------------------------- - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - - // Perform three DMA transfers to 3 different locations in the main memory! - // dst_width: Pixel width of destination image - // dst_addr: Destination address in main memory - // dst_vu: Counter which is incremented one by one - // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) - - mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) - dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) - (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) - dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) - STR_BUF+curr_dst_idx, // Tag - 0, 0 ); - - // wait for completion - DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); - //--------------------------------------------------------------------------------------------- -} - - -/* - * bilinear_scale_line_w8() - * - * processes a line of yuv-input, width has to be a multiple of 8 - * scaled yuv-output is written to local store buffer - * - * @param src buffer for 2 lines input - * @param dst_ buffer for 1 line output - * @param dst_width the width of the destination line - * @param vf_x_scale a float vector, at each entry is the x_scale-factor - * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line - * @param src_linestride the stride of the srcline - */ -void bilinear_scale_line_w8( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) { - - unsigned char* dst = dst_; - - unsigned int dst_x; - for( dst_x=0; dst_xfirst 4 pixel - // upper range->next 4 pixel - vector unsigned int vui_inc_dst_x_lower_range = { 0, 1, 2, 3 }; - vector unsigned int vui_inc_dst_x_upper_range = { 4, 5, 6, 7 }; - vector unsigned int vui_dst_x_lower_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_lower_range ); - vector unsigned int vui_dst_x_upper_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_upper_range ); - - // calculate weight EAST-WEST - vector float vf_dst_x_lower_range = spu_convtf( vui_dst_x_lower_range, 0 ); - vector float vf_dst_x_upper_range = spu_convtf( vui_dst_x_upper_range, 0 ); - vector float vf_src_x_lower_range = spu_mul( vf_dst_x_lower_range, vf_x_scale ); - vector float vf_src_x_upper_range = spu_mul( vf_dst_x_upper_range, vf_x_scale ); - vector unsigned int vui_interpl_x_lower_range = spu_convtu( vf_src_x_lower_range, 0 ); - vector unsigned int vui_interpl_x_upper_range = spu_convtu( vf_src_x_upper_range, 0 ); - vector float vf_interpl_x_lower_range = spu_convtf( vui_interpl_x_lower_range, 0 ); - vector float vf_interpl_x_upper_range = spu_convtf( vui_interpl_x_upper_range, 0 ); - vector float vf_EWweight_lower_range = spu_sub( vf_src_x_lower_range, vf_interpl_x_lower_range ); - vector float vf_EWweight_upper_range = spu_sub( vf_src_x_upper_range, vf_interpl_x_upper_range ); - - // calculate address offset - // - // pixel NORTH WEST - vector unsigned int vui_off_pixelNW_lower_range = vui_interpl_x_lower_range; - vector unsigned int vui_off_pixelNW_upper_range = vui_interpl_x_upper_range; - - // pixel NORTH EAST-->(offpixelNW+1) - vector unsigned int vui_add_1 = { 1, 1, 1, 1 }; - vector unsigned int vui_off_pixelNE_lower_range = spu_add( vui_off_pixelNW_lower_range, vui_add_1 ); - vector unsigned int vui_off_pixelNE_upper_range = spu_add( vui_off_pixelNW_upper_range, vui_add_1 ); - - // SOUTH-WEST-->(offpixelNW+src_linestride) - vector unsigned int vui_srclinestride = spu_splats( src_linestride ); - vector unsigned int vui_off_pixelSW_lower_range = spu_add( vui_srclinestride, vui_off_pixelNW_lower_range ); - vector unsigned int vui_off_pixelSW_upper_range = spu_add( vui_srclinestride, vui_off_pixelNW_upper_range ); - - // SOUTH-EAST-->(offpixelNW+src_linestride+1) - vector unsigned int vui_off_pixelSE_lower_range = spu_add( vui_srclinestride, vui_off_pixelNE_lower_range ); - vector unsigned int vui_off_pixelSE_upper_range = spu_add( vui_srclinestride, vui_off_pixelNE_upper_range ); - - // calculate each address - vector unsigned int vui_src_ls = spu_splats( (unsigned int) src ); - vector unsigned int vui_addr_pixelNW_lower_range = spu_add( vui_src_ls, vui_off_pixelNW_lower_range ); - vector unsigned int vui_addr_pixelNW_upper_range = spu_add( vui_src_ls, vui_off_pixelNW_upper_range ); - vector unsigned int vui_addr_pixelNE_lower_range = spu_add( vui_src_ls, vui_off_pixelNE_lower_range ); - vector unsigned int vui_addr_pixelNE_upper_range = spu_add( vui_src_ls, vui_off_pixelNE_upper_range ); - - vector unsigned int vui_addr_pixelSW_lower_range = spu_add( vui_src_ls, vui_off_pixelSW_lower_range ); - vector unsigned int vui_addr_pixelSW_upper_range = spu_add( vui_src_ls, vui_off_pixelSW_upper_range ); - vector unsigned int vui_addr_pixelSE_lower_range = spu_add( vui_src_ls, vui_off_pixelSE_lower_range ); - vector unsigned int vui_addr_pixelSE_upper_range = spu_add( vui_src_ls, vui_off_pixelSE_upper_range ); - - // get each pixel - // - // scalar load, afterwards insertion into the right position - // NORTH WEST - vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - vector unsigned char vuc_pixel_NW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 0 )), null_vector, 3 ); - vuc_pixel_NW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 1 )), - vuc_pixel_NW_lower_range, 7 ); - vuc_pixel_NW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 2 )), - vuc_pixel_NW_lower_range, 11 ); - vuc_pixel_NW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 3 )), - vuc_pixel_NW_lower_range, 15 ); - - vector unsigned char vuc_pixel_NW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 0 )), null_vector, 3 ); - vuc_pixel_NW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 1 )), - vuc_pixel_NW_upper_range, 7 ); - vuc_pixel_NW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 2 )), - vuc_pixel_NW_upper_range, 11 ); - vuc_pixel_NW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 3 )), - vuc_pixel_NW_upper_range, 15 ); - - // NORTH EAST - vector unsigned char vuc_pixel_NE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 0 )), null_vector, 3 ); - vuc_pixel_NE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 1 )), - vuc_pixel_NE_lower_range, 7 ); - vuc_pixel_NE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 2 )), - vuc_pixel_NE_lower_range, 11 ); - vuc_pixel_NE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 3 )), - vuc_pixel_NE_lower_range, 15 ); - - vector unsigned char vuc_pixel_NE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 0 )), null_vector, 3 ); - vuc_pixel_NE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 1 )), - vuc_pixel_NE_upper_range, 7 ); - vuc_pixel_NE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 2 )), - vuc_pixel_NE_upper_range, 11 ); - vuc_pixel_NE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 3 )), - vuc_pixel_NE_upper_range, 15 ); - - - // SOUTH WEST - vector unsigned char vuc_pixel_SW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 0 )), null_vector, 3 ); - vuc_pixel_SW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 1 )), - vuc_pixel_SW_lower_range, 7 ); - vuc_pixel_SW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 2 )), - vuc_pixel_SW_lower_range, 11 ); - vuc_pixel_SW_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 3 )), - vuc_pixel_SW_lower_range, 15 ); - - vector unsigned char vuc_pixel_SW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 0 )), null_vector, 3 ); - vuc_pixel_SW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 1 )), - vuc_pixel_SW_upper_range, 7 ); - vuc_pixel_SW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 2 )), - vuc_pixel_SW_upper_range, 11 ); - vuc_pixel_SW_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 3 )), - vuc_pixel_SW_upper_range, 15 ); - - // SOUTH EAST - vector unsigned char vuc_pixel_SE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 0 )), null_vector, 3 ); - vuc_pixel_SE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 1 )), - vuc_pixel_SE_lower_range, 7 ); - vuc_pixel_SE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 2 )), - vuc_pixel_SE_lower_range, 11 ); - vuc_pixel_SE_lower_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 3 )), - vuc_pixel_SE_lower_range, 15 ); - - vector unsigned char vuc_pixel_SE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 0 )), null_vector, 3 ); - vuc_pixel_SE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 1 )), - vuc_pixel_SE_upper_range, 7 ); - vuc_pixel_SE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 2 )), - vuc_pixel_SE_upper_range, 11 ); - vuc_pixel_SE_upper_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 3 )), - vuc_pixel_SE_upper_range, 15 ); - - - // convert to float - vector float vf_pixel_NW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_lower_range, 0 ); - vector float vf_pixel_NW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_upper_range, 0 ); - - vector float vf_pixel_SW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_lower_range, 0 ); - vector float vf_pixel_SW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_upper_range, 0 ); - - vector float vf_pixel_NE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_lower_range, 0 ); - vector float vf_pixel_NE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_upper_range, 0 ); - - vector float vf_pixel_SE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_lower_range, 0 ); - vector float vf_pixel_SE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_upper_range, 0 ); - - - - // first linear interpolation: EWtop - // EWtop = NW + EWweight*(NE-NW) - // - // lower range - vector float vf_EWtop_lower_range_tmp = spu_sub( vf_pixel_NE_lower_range, vf_pixel_NW_lower_range ); - vector float vf_EWtop_lower_range = spu_madd( vf_EWweight_lower_range, - vf_EWtop_lower_range_tmp, - vf_pixel_NW_lower_range ); - - // upper range - vector float vf_EWtop_upper_range_tmp = spu_sub( vf_pixel_NE_upper_range, vf_pixel_NW_upper_range ); - vector float vf_EWtop_upper_range = spu_madd( vf_EWweight_upper_range, - vf_EWtop_upper_range_tmp, - vf_pixel_NW_upper_range ); - - - - // second linear interpolation: EWbottom - // EWbottom = SW + EWweight*(SE-SW) - // - // lower range - vector float vf_EWbottom_lower_range_tmp = spu_sub( vf_pixel_SE_lower_range, vf_pixel_SW_lower_range ); - vector float vf_EWbottom_lower_range = spu_madd( vf_EWweight_lower_range, - vf_EWbottom_lower_range_tmp, - vf_pixel_SW_lower_range ); - - // upper range - vector float vf_EWbottom_upper_range_tmp = spu_sub( vf_pixel_SE_upper_range, vf_pixel_SW_upper_range ); - vector float vf_EWbottom_upper_range = spu_madd( vf_EWweight_upper_range, - vf_EWbottom_upper_range_tmp, - vf_pixel_SW_upper_range ); - - - - // third linear interpolation: the bilinear interpolated value - // result = EWtop + NSweight*(EWbottom-EWtop); - // - // lower range - vector float vf_result_lower_range_tmp = spu_sub( vf_EWbottom_lower_range, vf_EWtop_lower_range ); - vector float vf_result_lower_range = spu_madd( vf_NSweight, - vf_result_lower_range_tmp, - vf_EWtop_lower_range ); - - // upper range - vector float vf_result_upper_range_tmp = spu_sub( vf_EWbottom_upper_range, vf_EWtop_upper_range ); - vector float vf_result_upper_range = spu_madd( vf_NSweight, - vf_result_upper_range_tmp, - vf_EWtop_upper_range ); - - - // convert back: using saturated arithmetic - vector unsigned int vui_result_lower_range = vfloat_to_vuint( vf_result_lower_range ); - vector unsigned int vui_result_upper_range = vfloat_to_vuint( vf_result_upper_range ); - - // merge results->lower,upper - vector unsigned char vuc_mask_merge_result = { 0x03, 0x07, 0x0B, 0x0F, - 0x13, 0x17, 0x1B, 0x1F, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00 }; - - vector unsigned char vuc_result = spu_shuffle( (vector unsigned char) vui_result_lower_range, - (vector unsigned char) vui_result_upper_range, - vuc_mask_merge_result ); - - // partial storing - vector unsigned char vuc_mask_out = { 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF }; - - - // get currently stored data - vector unsigned char vuc_orig = *((vector unsigned char*)dst); - - // clear currently stored data - vuc_orig = spu_and( vuc_orig, - spu_rlqwbyte( vuc_mask_out, ((unsigned int)dst)&0x0F) ); - - // rotate result according to storing address - vuc_result = spu_rlqwbyte( vuc_result, ((unsigned int)dst)&0x0F ); - - // store result - *((vector unsigned char*)dst) = spu_or( vuc_result, - vuc_orig ); - dst += 8; - } -} - - -/* - * bilinear_scale_line_w16() - * - * processes a line of yuv-input, width has to be a multiple of 16 - * scaled yuv-output is written to local store buffer - * - * @param src buffer for 2 lines input - * @param dst_ buffer for 1 line output - * @param dst_width the width of the destination line - * @param vf_x_scale a float vector, at each entry is the x_scale-factor - * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line - * @param src_linestride the stride of the srcline - */ -void bilinear_scale_line_w16( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) { - - unsigned char* dst = dst_; - - unsigned int dst_x; - for( dst_x=0; dst_xpixel 1 2 3 4 - // second range->pixel 5 6 7 8 - // third range->pixel 9 10 11 12 - // fourth range->pixel 13 14 15 16 - vector unsigned int vui_inc_dst_x_first_range = { 0, 1, 2, 3 }; - vector unsigned int vui_inc_dst_x_second_range = { 4, 5, 6, 7 }; - vector unsigned int vui_inc_dst_x_third_range = { 8, 9, 10, 11 }; - vector unsigned int vui_inc_dst_x_fourth_range = { 12, 13, 14, 15 }; - vector unsigned int vui_dst_x_first_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_first_range ); - vector unsigned int vui_dst_x_second_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_second_range ); - vector unsigned int vui_dst_x_third_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_third_range ); - vector unsigned int vui_dst_x_fourth_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_fourth_range ); - - // calculate weight EAST-WEST - vector float vf_dst_x_first_range = spu_convtf( vui_dst_x_first_range, 0 ); - vector float vf_dst_x_second_range = spu_convtf( vui_dst_x_second_range, 0 ); - vector float vf_dst_x_third_range = spu_convtf( vui_dst_x_third_range, 0 ); - vector float vf_dst_x_fourth_range = spu_convtf( vui_dst_x_fourth_range, 0 ); - vector float vf_src_x_first_range = spu_mul( vf_dst_x_first_range, vf_x_scale ); - vector float vf_src_x_second_range = spu_mul( vf_dst_x_second_range, vf_x_scale ); - vector float vf_src_x_third_range = spu_mul( vf_dst_x_third_range, vf_x_scale ); - vector float vf_src_x_fourth_range = spu_mul( vf_dst_x_fourth_range, vf_x_scale ); - vector unsigned int vui_interpl_x_first_range = spu_convtu( vf_src_x_first_range, 0 ); - vector unsigned int vui_interpl_x_second_range = spu_convtu( vf_src_x_second_range, 0 ); - vector unsigned int vui_interpl_x_third_range = spu_convtu( vf_src_x_third_range, 0 ); - vector unsigned int vui_interpl_x_fourth_range = spu_convtu( vf_src_x_fourth_range, 0 ); - vector float vf_interpl_x_first_range = spu_convtf( vui_interpl_x_first_range, 0 ); - vector float vf_interpl_x_second_range = spu_convtf( vui_interpl_x_second_range, 0 ); - vector float vf_interpl_x_third_range = spu_convtf( vui_interpl_x_third_range, 0 ); - vector float vf_interpl_x_fourth_range = spu_convtf( vui_interpl_x_fourth_range, 0 ); - vector float vf_EWweight_first_range = spu_sub( vf_src_x_first_range, vf_interpl_x_first_range ); - vector float vf_EWweight_second_range = spu_sub( vf_src_x_second_range, vf_interpl_x_second_range ); - vector float vf_EWweight_third_range = spu_sub( vf_src_x_third_range, vf_interpl_x_third_range ); - vector float vf_EWweight_fourth_range = spu_sub( vf_src_x_fourth_range, vf_interpl_x_fourth_range ); - - // calculate address offset - // - // pixel NORTH WEST - vector unsigned int vui_off_pixelNW_first_range = vui_interpl_x_first_range; - vector unsigned int vui_off_pixelNW_second_range = vui_interpl_x_second_range; - vector unsigned int vui_off_pixelNW_third_range = vui_interpl_x_third_range; - vector unsigned int vui_off_pixelNW_fourth_range = vui_interpl_x_fourth_range; - - // pixel NORTH EAST-->(offpixelNW+1) - vector unsigned int vui_add_1 = { 1, 1, 1, 1 }; - vector unsigned int vui_off_pixelNE_first_range = spu_add( vui_off_pixelNW_first_range, vui_add_1 ); - vector unsigned int vui_off_pixelNE_second_range = spu_add( vui_off_pixelNW_second_range, vui_add_1 ); - vector unsigned int vui_off_pixelNE_third_range = spu_add( vui_off_pixelNW_third_range, vui_add_1 ); - vector unsigned int vui_off_pixelNE_fourth_range = spu_add( vui_off_pixelNW_fourth_range, vui_add_1 ); - - // SOUTH-WEST-->(offpixelNW+src_linestride) - vector unsigned int vui_srclinestride = spu_splats( src_linestride ); - vector unsigned int vui_off_pixelSW_first_range = spu_add( vui_srclinestride, vui_off_pixelNW_first_range ); - vector unsigned int vui_off_pixelSW_second_range = spu_add( vui_srclinestride, vui_off_pixelNW_second_range ); - vector unsigned int vui_off_pixelSW_third_range = spu_add( vui_srclinestride, vui_off_pixelNW_third_range ); - vector unsigned int vui_off_pixelSW_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNW_fourth_range ); - - // SOUTH-EAST-->(offpixelNW+src_linestride+1) - vector unsigned int vui_off_pixelSE_first_range = spu_add( vui_srclinestride, vui_off_pixelNE_first_range ); - vector unsigned int vui_off_pixelSE_second_range = spu_add( vui_srclinestride, vui_off_pixelNE_second_range ); - vector unsigned int vui_off_pixelSE_third_range = spu_add( vui_srclinestride, vui_off_pixelNE_third_range ); - vector unsigned int vui_off_pixelSE_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNE_fourth_range ); - - // calculate each address - vector unsigned int vui_src_ls = spu_splats( (unsigned int) src ); - vector unsigned int vui_addr_pixelNW_first_range = spu_add( vui_src_ls, vui_off_pixelNW_first_range ); - vector unsigned int vui_addr_pixelNW_second_range = spu_add( vui_src_ls, vui_off_pixelNW_second_range ); - vector unsigned int vui_addr_pixelNW_third_range = spu_add( vui_src_ls, vui_off_pixelNW_third_range ); - vector unsigned int vui_addr_pixelNW_fourth_range = spu_add( vui_src_ls, vui_off_pixelNW_fourth_range ); - - vector unsigned int vui_addr_pixelNE_first_range = spu_add( vui_src_ls, vui_off_pixelNE_first_range ); - vector unsigned int vui_addr_pixelNE_second_range = spu_add( vui_src_ls, vui_off_pixelNE_second_range ); - vector unsigned int vui_addr_pixelNE_third_range = spu_add( vui_src_ls, vui_off_pixelNE_third_range ); - vector unsigned int vui_addr_pixelNE_fourth_range = spu_add( vui_src_ls, vui_off_pixelNE_fourth_range ); - - vector unsigned int vui_addr_pixelSW_first_range = spu_add( vui_src_ls, vui_off_pixelSW_first_range ); - vector unsigned int vui_addr_pixelSW_second_range = spu_add( vui_src_ls, vui_off_pixelSW_second_range ); - vector unsigned int vui_addr_pixelSW_third_range = spu_add( vui_src_ls, vui_off_pixelSW_third_range ); - vector unsigned int vui_addr_pixelSW_fourth_range = spu_add( vui_src_ls, vui_off_pixelSW_fourth_range ); - - vector unsigned int vui_addr_pixelSE_first_range = spu_add( vui_src_ls, vui_off_pixelSE_first_range ); - vector unsigned int vui_addr_pixelSE_second_range = spu_add( vui_src_ls, vui_off_pixelSE_second_range ); - vector unsigned int vui_addr_pixelSE_third_range = spu_add( vui_src_ls, vui_off_pixelSE_third_range ); - vector unsigned int vui_addr_pixelSE_fourth_range = spu_add( vui_src_ls, vui_off_pixelSE_fourth_range ); - - - // get each pixel - // - // scalar load, afterwards insertion into the right position - // NORTH WEST - // first range - vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - vector unsigned char vuc_pixel_NW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 0 )), null_vector, 3 ); - vuc_pixel_NW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 1 )), - vuc_pixel_NW_first_range, 7 ); - vuc_pixel_NW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 2 )), - vuc_pixel_NW_first_range, 11 ); - vuc_pixel_NW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 3 )), - vuc_pixel_NW_first_range, 15 ); - // second range - vector unsigned char vuc_pixel_NW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 0 )), null_vector, 3 ); - vuc_pixel_NW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 1 )), - vuc_pixel_NW_second_range, 7 ); - vuc_pixel_NW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 2 )), - vuc_pixel_NW_second_range, 11 ); - vuc_pixel_NW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 3 )), - vuc_pixel_NW_second_range, 15 ); - // third range - vector unsigned char vuc_pixel_NW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 0 )), null_vector, 3 ); - vuc_pixel_NW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 1 )), - vuc_pixel_NW_third_range, 7 ); - vuc_pixel_NW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 2 )), - vuc_pixel_NW_third_range, 11 ); - vuc_pixel_NW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 3 )), - vuc_pixel_NW_third_range, 15 ); - // fourth range - vector unsigned char vuc_pixel_NW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 0 )), null_vector, 3 ); - vuc_pixel_NW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 1 )), - vuc_pixel_NW_fourth_range, 7 ); - vuc_pixel_NW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 2 )), - vuc_pixel_NW_fourth_range, 11 ); - vuc_pixel_NW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 3 )), - vuc_pixel_NW_fourth_range, 15 ); - - // NORTH EAST - // first range - vector unsigned char vuc_pixel_NE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 0 )), null_vector, 3 ); - vuc_pixel_NE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 1 )), - vuc_pixel_NE_first_range, 7 ); - vuc_pixel_NE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 2 )), - vuc_pixel_NE_first_range, 11 ); - vuc_pixel_NE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 3 )), - vuc_pixel_NE_first_range, 15 ); - // second range - vector unsigned char vuc_pixel_NE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 0 )), null_vector, 3 ); - vuc_pixel_NE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 1 )), - vuc_pixel_NE_second_range, 7 ); - vuc_pixel_NE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 2 )), - vuc_pixel_NE_second_range, 11 ); - vuc_pixel_NE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 3 )), - vuc_pixel_NE_second_range, 15 ); - // third range - vector unsigned char vuc_pixel_NE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 0 )), null_vector, 3 ); - vuc_pixel_NE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 1 )), - vuc_pixel_NE_third_range, 7 ); - vuc_pixel_NE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 2 )), - vuc_pixel_NE_third_range, 11 ); - vuc_pixel_NE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 3 )), - vuc_pixel_NE_third_range, 15 ); - // fourth range - vector unsigned char vuc_pixel_NE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 0 )), null_vector, 3 ); - vuc_pixel_NE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 1 )), - vuc_pixel_NE_fourth_range, 7 ); - vuc_pixel_NE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 2 )), - vuc_pixel_NE_fourth_range, 11 ); - vuc_pixel_NE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 3 )), - vuc_pixel_NE_fourth_range, 15 ); - - // SOUTH WEST - // first range - vector unsigned char vuc_pixel_SW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 0 )), null_vector, 3 ); - vuc_pixel_SW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 1 )), - vuc_pixel_SW_first_range, 7 ); - vuc_pixel_SW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 2 )), - vuc_pixel_SW_first_range, 11 ); - vuc_pixel_SW_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 3 )), - vuc_pixel_SW_first_range, 15 ); - // second range - vector unsigned char vuc_pixel_SW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 0 )), null_vector, 3 ); - vuc_pixel_SW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 1 )), - vuc_pixel_SW_second_range, 7 ); - vuc_pixel_SW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 2 )), - vuc_pixel_SW_second_range, 11 ); - vuc_pixel_SW_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 3 )), - vuc_pixel_SW_second_range, 15 ); - // third range - vector unsigned char vuc_pixel_SW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 0 )), null_vector, 3 ); - vuc_pixel_SW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 1 )), - vuc_pixel_SW_third_range, 7 ); - vuc_pixel_SW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 2 )), - vuc_pixel_SW_third_range, 11 ); - vuc_pixel_SW_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 3 )), - vuc_pixel_SW_third_range, 15 ); - // fourth range - vector unsigned char vuc_pixel_SW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 0 )), null_vector, 3 ); - vuc_pixel_SW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 1 )), - vuc_pixel_SW_fourth_range, 7 ); - vuc_pixel_SW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 2 )), - vuc_pixel_SW_fourth_range, 11 ); - vuc_pixel_SW_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 3 )), - vuc_pixel_SW_fourth_range, 15 ); - - // NORTH EAST - // first range - vector unsigned char vuc_pixel_SE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 0 )), null_vector, 3 ); - vuc_pixel_SE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 1 )), - vuc_pixel_SE_first_range, 7 ); - vuc_pixel_SE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 2 )), - vuc_pixel_SE_first_range, 11 ); - vuc_pixel_SE_first_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 3 )), - vuc_pixel_SE_first_range, 15 ); - // second range - vector unsigned char vuc_pixel_SE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 0 )), null_vector, 3 ); - vuc_pixel_SE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 1 )), - vuc_pixel_SE_second_range, 7 ); - vuc_pixel_SE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 2 )), - vuc_pixel_SE_second_range, 11 ); - vuc_pixel_SE_second_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 3 )), - vuc_pixel_SE_second_range, 15 ); - // third range - vector unsigned char vuc_pixel_SE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 0 )), null_vector, 3 ); - vuc_pixel_SE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 1 )), - vuc_pixel_SE_third_range, 7 ); - vuc_pixel_SE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 2 )), - vuc_pixel_SE_third_range, 11 ); - vuc_pixel_SE_third_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 3 )), - vuc_pixel_SE_third_range, 15 ); - // fourth range - vector unsigned char vuc_pixel_SE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 0 )), null_vector, 3 ); - vuc_pixel_SE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 1 )), - vuc_pixel_SE_fourth_range, 7 ); - vuc_pixel_SE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 2 )), - vuc_pixel_SE_fourth_range, 11 ); - vuc_pixel_SE_fourth_range = spu_insert( - *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 3 )), - vuc_pixel_SE_fourth_range, 15 ); - - - - // convert to float - vector float vf_pixel_NW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_first_range, 0 ); - vector float vf_pixel_NW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_second_range, 0 ); - vector float vf_pixel_NW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_third_range, 0 ); - vector float vf_pixel_NW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_fourth_range, 0 ); - - vector float vf_pixel_NE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_first_range, 0 ); - vector float vf_pixel_NE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_second_range, 0 ); - vector float vf_pixel_NE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_third_range, 0 ); - vector float vf_pixel_NE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_fourth_range, 0 ); - - vector float vf_pixel_SW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_first_range, 0 ); - vector float vf_pixel_SW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_second_range, 0 ); - vector float vf_pixel_SW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_third_range, 0 ); - vector float vf_pixel_SW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_fourth_range, 0 ); - - vector float vf_pixel_SE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_first_range, 0 ); - vector float vf_pixel_SE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_second_range, 0 ); - vector float vf_pixel_SE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_third_range, 0 ); - vector float vf_pixel_SE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_fourth_range, 0 ); - - // first linear interpolation: EWtop - // EWtop = NW + EWweight*(NE-NW) - // - // first range - vector float vf_EWtop_first_range_tmp = spu_sub( vf_pixel_NE_first_range, vf_pixel_NW_first_range ); - vector float vf_EWtop_first_range = spu_madd( vf_EWweight_first_range, - vf_EWtop_first_range_tmp, - vf_pixel_NW_first_range ); - - // second range - vector float vf_EWtop_second_range_tmp = spu_sub( vf_pixel_NE_second_range, vf_pixel_NW_second_range ); - vector float vf_EWtop_second_range = spu_madd( vf_EWweight_second_range, - vf_EWtop_second_range_tmp, - vf_pixel_NW_second_range ); - - // third range - vector float vf_EWtop_third_range_tmp = spu_sub( vf_pixel_NE_third_range, vf_pixel_NW_third_range ); - vector float vf_EWtop_third_range = spu_madd( vf_EWweight_third_range, - vf_EWtop_third_range_tmp, - vf_pixel_NW_third_range ); - - // fourth range - vector float vf_EWtop_fourth_range_tmp = spu_sub( vf_pixel_NE_fourth_range, vf_pixel_NW_fourth_range ); - vector float vf_EWtop_fourth_range = spu_madd( vf_EWweight_fourth_range, - vf_EWtop_fourth_range_tmp, - vf_pixel_NW_fourth_range ); - - - - // second linear interpolation: EWbottom - // EWbottom = SW + EWweight*(SE-SW) - // - // first range - vector float vf_EWbottom_first_range_tmp = spu_sub( vf_pixel_SE_first_range, vf_pixel_SW_first_range ); - vector float vf_EWbottom_first_range = spu_madd( vf_EWweight_first_range, - vf_EWbottom_first_range_tmp, - vf_pixel_SW_first_range ); - - // second range - vector float vf_EWbottom_second_range_tmp = spu_sub( vf_pixel_SE_second_range, vf_pixel_SW_second_range ); - vector float vf_EWbottom_second_range = spu_madd( vf_EWweight_second_range, - vf_EWbottom_second_range_tmp, - vf_pixel_SW_second_range ); - // first range - vector float vf_EWbottom_third_range_tmp = spu_sub( vf_pixel_SE_third_range, vf_pixel_SW_third_range ); - vector float vf_EWbottom_third_range = spu_madd( vf_EWweight_third_range, - vf_EWbottom_third_range_tmp, - vf_pixel_SW_third_range ); - - // first range - vector float vf_EWbottom_fourth_range_tmp = spu_sub( vf_pixel_SE_fourth_range, vf_pixel_SW_fourth_range ); - vector float vf_EWbottom_fourth_range = spu_madd( vf_EWweight_fourth_range, - vf_EWbottom_fourth_range_tmp, - vf_pixel_SW_fourth_range ); - - - - // third linear interpolation: the bilinear interpolated value - // result = EWtop + NSweight*(EWbottom-EWtop); - // - // first range - vector float vf_result_first_range_tmp = spu_sub( vf_EWbottom_first_range, vf_EWtop_first_range ); - vector float vf_result_first_range = spu_madd( vf_NSweight, - vf_result_first_range_tmp, - vf_EWtop_first_range ); - - // second range - vector float vf_result_second_range_tmp = spu_sub( vf_EWbottom_second_range, vf_EWtop_second_range ); - vector float vf_result_second_range = spu_madd( vf_NSweight, - vf_result_second_range_tmp, - vf_EWtop_second_range ); - - // third range - vector float vf_result_third_range_tmp = spu_sub( vf_EWbottom_third_range, vf_EWtop_third_range ); - vector float vf_result_third_range = spu_madd( vf_NSweight, - vf_result_third_range_tmp, - vf_EWtop_third_range ); - - // fourth range - vector float vf_result_fourth_range_tmp = spu_sub( vf_EWbottom_fourth_range, vf_EWtop_fourth_range ); - vector float vf_result_fourth_range = spu_madd( vf_NSweight, - vf_result_fourth_range_tmp, - vf_EWtop_fourth_range ); - - - - // convert back: using saturated arithmetic - vector unsigned int vui_result_first_range = vfloat_to_vuint( vf_result_first_range ); - vector unsigned int vui_result_second_range = vfloat_to_vuint( vf_result_second_range ); - vector unsigned int vui_result_third_range = vfloat_to_vuint( vf_result_third_range ); - vector unsigned int vui_result_fourth_range = vfloat_to_vuint( vf_result_fourth_range ); - - // merge results->lower,upper - vector unsigned char vuc_mask_merge_result_first_second = { 0x03, 0x07, 0x0B, 0x0F, - 0x13, 0x17, 0x1B, 0x1F, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00 }; - - vector unsigned char vuc_mask_merge_result_third_fourth = { 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x03, 0x07, 0x0B, 0x0F, - 0x13, 0x17, 0x1B, 0x1F }; - - vector unsigned char vuc_result_first_second = - spu_shuffle( (vector unsigned char) vui_result_first_range, - (vector unsigned char) vui_result_second_range, - vuc_mask_merge_result_first_second ); - - vector unsigned char vuc_result_third_fourth = - spu_shuffle( (vector unsigned char) vui_result_third_range, - (vector unsigned char) vui_result_fourth_range, - vuc_mask_merge_result_third_fourth ); - - // store result - *((vector unsigned char*)dst) = spu_or( vuc_result_first_second, - vuc_result_third_fourth ); - dst += 16; - } -} - -- cgit v1.2.3