From 235305e6ac077ffd23b6e2c1f4fd7fb47f6a64b8 Mon Sep 17 00:00:00 2001 From: Andrew Mahone Date: Thu, 11 Jun 2009 23:48:30 +0000 Subject: Don't compensate for lack of shift in second IDCT stage, allowing quantization table to be reduced to 16-bit. Reduce IDCT workspace to 16-bit. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21254 a1c6a512-1295-4272-9138-f99709370657 --- apps/recorder/jpeg_load.c | 84 +++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 43 deletions(-) (limited to 'apps') diff --git a/apps/recorder/jpeg_load.c b/apps/recorder/jpeg_load.c index 754cf41ab1..d1f47d7a33 100644 --- a/apps/recorder/jpeg_load.c +++ b/apps/recorder/jpeg_load.c @@ -96,7 +96,7 @@ struct jpeg #endif jpeg_pix_t *img_buf; - int quanttable[4][QUANT_TABLE_LENGTH]; /* raw quantization tables 0-3 */ + int16_t quanttable[4][QUANT_TABLE_LENGTH];/* raw quantization tables 0-3 */ struct huffman_table hufftable[2]; /* Huffman tables */ struct derived_tbl dc_derived_tbls[2]; /* Huffman-LUTs */ @@ -206,10 +206,15 @@ INLINE unsigned range_limit(int value) * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. * For 12-bit samples, a full 32-bit multiplication will be needed. */ -#define MULTIPLY16(var,const) (((short) (var)) * ((short) (const))) - #define MULTIPLY(var1, var2) ((var1) * (var2)) +#if defined(CPU_SH) || defined(CPU_COLDFIRE) || \ + (defined(CPU_ARM) && ARM_ARCH > 4) +#define MULTIPLY16(var,const) (((short) (var)) * ((short) (const))) +#else +#define MULTIPLY16 MULTIPLY +#endif + /* * Macros for handling fixed-point arithmetic; these are used by many * but not all of the DCT/IDCT modules. @@ -255,19 +260,19 @@ INLINE unsigned range_limit(int value) #define COMPONENT_SHIFT 15 /* horizontal-pass 1-point IDCT */ -static void idct1h(int *ws, unsigned char *out, int rows, int rowstep) +static void idct1h(int16_t *ws, unsigned char *out, int rows, int rowstep) { int row; for (row = 0; row < rows; row++) { - *out = range_limit((int) DESCALE(*ws, DS_OUT)); + *out = range_limit((int) DESCALE(*ws, 3 + PASS1_BITS)); out += rowstep; ws += 8; } } /* vertical-pass 2-point IDCT */ -static void idct2v(int *ws, int cols) +static void idct2v(int16_t *ws, int cols) { int col; for (col = 0; col < cols; col++) @@ -281,30 +286,30 @@ static void idct2v(int *ws, int cols) } /* horizontal-pass 2-point IDCT */ -static void idct2h(int *ws, unsigned char *out, int rows, int rowstep) +static void idct2h(int16_t *ws, unsigned char *out, int rows, int rowstep) { int row; for (row = 0; row < rows; row++) { - int tmp1 = ws[0] + (ONE << (DS_OUT - 1)); + int tmp1 = ws[0] + (ONE << (PASS1_BITS + 2)); int tmp2 = ws[1]; out[JPEG_PIX_SZ*0] = range_limit((int) RIGHT_SHIFT(tmp1 + tmp2, - DS_OUT)); + PASS1_BITS + 3)); out[JPEG_PIX_SZ*1] = range_limit((int) RIGHT_SHIFT(tmp1 - tmp2, - DS_OUT)); + PASS1_BITS + 3)); out += rowstep; ws += 8; } } /* vertical-pass 4-point IDCT */ -static void idct4v(int *ws, int cols) +static void idct4v(int16_t *ws, int cols) { - int tmp0, tmp2, tmp10, tmp12; - int z1, z2, z3; int col; for (col = 0; col < cols; col++, ws++) { + int tmp0, tmp2, tmp10, tmp12; + int z1, z2, z3; /* Even part */ tmp0 = ws[8*0]; @@ -336,7 +341,7 @@ static void idct4v(int *ws, int cols) } /* horizontal-pass 4-point IDCT */ -static void idct4h(int *ws, unsigned char *out, int rows, int rowstep) +static void idct4h(int16_t *ws, unsigned char *out, int rows, int rowstep) { int tmp0, tmp2, tmp10, tmp12; int z1, z2, z3; @@ -375,7 +380,7 @@ static void idct4h(int *ws, unsigned char *out, int rows, int rowstep) } /* vertical-pass 8-point IDCT */ -static void idct8v(int *ws, int cols) +static void idct8v(int16_t *ws, int cols) { long tmp0, tmp1, tmp2, tmp3; long tmp10, tmp11, tmp12, tmp13; @@ -469,7 +474,7 @@ static void idct8v(int *ws, int cols) } /* horizontal-pass 8-point IDCT */ -static void idct8h(int *ws, unsigned char *out, int rows, int rowstep) +static void idct8h(int16_t *ws, unsigned char *out, int rows, int rowstep) { long tmp0, tmp1, tmp2, tmp3; long tmp10, tmp11, tmp12, tmp13; @@ -580,7 +585,7 @@ static void idct8h(int *ws, unsigned char *out, int rows, int rowstep) #ifdef HAVE_LCD_COLOR /* vertical-pass 16-point IDCT */ -static void idct16v(int *ws, int cols) +static void idct16v(int16_t *ws, int cols) { long tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; long tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; @@ -687,7 +692,7 @@ static void idct16v(int *ws, int cols) } /* horizontal-pass 16-point IDCT */ -static void idct16h(int *ws, unsigned char *out, int rows, int rowstep) +static void idct16h(int16_t *ws, unsigned char *out, int rows, int rowstep) { long tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; long tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; @@ -812,19 +817,18 @@ static void idct16h(int *ws, unsigned char *out, int rows, int rowstep) #endif struct idct_entry { - int v_scale; - int h_scale; - void (*v_idct)(int *ws, int cols); - void (*h_idct)(int *ws, unsigned char *out, int rows, int rowstep); + int scale; + void (*v_idct)(int16_t *ws, int cols); + void (*h_idct)(int16_t *ws, unsigned char *out, int rows, int rowstep); }; struct idct_entry idct_tbl[] = { - { PASS1_BITS, CONST_BITS, NULL, idct1h }, - { PASS1_BITS, CONST_BITS, idct2v, idct2h }, - { 0, 0, idct4v, idct4h }, - { 0, 0, idct8v, idct8h }, + { PASS1_BITS, NULL, idct1h }, + { PASS1_BITS, idct2v, idct2h }, + { 0, idct4v, idct4h }, + { 0, idct8v, idct8h }, #ifdef HAVE_LCD_COLOR - { 0, 0, idct16v, idct16h }, + { 0, idct16v, idct16h }, #endif }; @@ -1506,20 +1510,14 @@ INLINE void fix_huff_tables(struct jpeg *p_jpeg) */ INLINE void fix_quant_tables(struct jpeg *p_jpeg) { - int shift, i, x, y, a; + int shift, i, j; for (i = 0; i < 2; i++) { - shift = idct_tbl[p_jpeg->v_scale[i]].v_scale + - idct_tbl[p_jpeg->h_scale[i]].h_scale; + shift = idct_tbl[p_jpeg->v_scale[i]].scale; if (shift) { - a = 0; - for (y = 0; y < (int)BIT_N(p_jpeg->h_scale[i]); y++) - { - for (x = 0; x < (int)BIT_N(p_jpeg->v_scale[i]); x++) - p_jpeg->quanttable[i][zig[a+x]] <<= shift; - a += 8; - } + for (j = 0; j < 64; j++) + p_jpeg->quanttable[i][j] <<= shift; } } } @@ -1780,7 +1778,7 @@ static struct img_part *store_row_jpeg(void *jpeg_args) store_offs[p_jpeg->store_pos[2]] = b_width << p_jpeg->v_scale[0]; store_offs[p_jpeg->store_pos[3]] = store_offs[1] + store_offs[2]; - int block[128]; /* decoded DCT coefficients */ + int16_t block[128]; /* decoded DCT coefficients */ for (x = 0; x < p_jpeg->x_mbl; x++) { int blkn; @@ -1804,13 +1802,13 @@ static struct img_part *store_row_jpeg(void *jpeg_args) #ifdef HAVE_LCD_COLOR p_jpeg->last_dc_val[ci] += s; /* output it (assumes zag[0] = 0) */ - block[0] = p_jpeg->last_dc_val[ci] * - p_jpeg->quanttable[!!ci][0]; + block[0] = MULTIPLY16(p_jpeg->last_dc_val[ci], + p_jpeg->quanttable[!!ci][0]); #else p_jpeg->last_dc_val += s; /* output it (assumes zag[0] = 0) */ - block[0] = p_jpeg->last_dc_val * - p_jpeg->quanttable[0][0]; + block[0] = MULTIPLY16(p_jpeg->last_dc_val, + p_jpeg->quanttable[0][0]); #endif /* coefficient buffer must be cleared */ MEMSET(block+1, 0, p_jpeg->zero_need[!!ci] * sizeof(int)); @@ -1830,7 +1828,7 @@ static struct img_part *store_row_jpeg(void *jpeg_args) if (a <= zag[p_jpeg->k_need[!!ci]] && (a & 7) <= (zag[p_jpeg->k_need[!!ci]] & 7)) { - r *= p_jpeg->quanttable[!!ci][k]; + r = MULTIPLY16(r, p_jpeg->quanttable[!!ci][k]); block[zag[k]] = r ; } } -- cgit v1.2.3