diff options
author | Andrew Mahone <andrew.mahone@gmail.com> | 2009-06-11 23:48:37 +0000 |
---|---|---|
committer | Andrew Mahone <andrew.mahone@gmail.com> | 2009-06-11 23:48:37 +0000 |
commit | 57ccbdd198b7b59deeb78f4910eff1133dd9698f (patch) | |
tree | 019222857d68802680d3242f89ad5216cefba37d /apps | |
parent | 235305e6ac077ffd23b6e2c1f4fd7fb47f6a64b8 (diff) | |
download | rockbox-57ccbdd198b7b59deeb78f4910eff1133dd9698f.tar.gz rockbox-57ccbdd198b7b59deeb78f4910eff1133dd9698f.zip |
Move +128 offset out of range_limit, and add it to the DC coefficient in each idct*h routine, for a small binsize improvement on idct4h, idct8h, and idct16h, and a negligible speed increase.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21255 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/recorder/jpeg_load.c | 22 |
1 files changed, 10 insertions, 12 deletions
diff --git a/apps/recorder/jpeg_load.c b/apps/recorder/jpeg_load.c index d1f47d7a33..de5c95eab7 100644 --- a/apps/recorder/jpeg_load.c +++ b/apps/recorder/jpeg_load.c | |||
@@ -123,8 +123,6 @@ INLINE unsigned range_limit(int value) | |||
123 | #if CONFIG_CPU == SH7034 | 123 | #if CONFIG_CPU == SH7034 |
124 | unsigned tmp; | 124 | unsigned tmp; |
125 | asm ( /* Note: Uses knowledge that only low byte of result is used */ | 125 | asm ( /* Note: Uses knowledge that only low byte of result is used */ |
126 | "mov #-128,%[t] \n" | ||
127 | "sub %[t],%[v] \n" /* value -= -128; equals value += 128; */ | ||
128 | "extu.b %[v],%[t] \n" | 126 | "extu.b %[v],%[t] \n" |
129 | "cmp/eq %[v],%[t] \n" /* low byte == whole number ? */ | 127 | "cmp/eq %[v],%[t] \n" /* low byte == whole number ? */ |
130 | "bt 1f \n" /* yes: no overflow */ | 128 | "bt 1f \n" /* yes: no overflow */ |
@@ -139,7 +137,6 @@ INLINE unsigned range_limit(int value) | |||
139 | #elif defined(CPU_COLDFIRE) | 137 | #elif defined(CPU_COLDFIRE) |
140 | /* Note: Uses knowledge that only the low byte of the result is used */ | 138 | /* Note: Uses knowledge that only the low byte of the result is used */ |
141 | asm ( | 139 | asm ( |
142 | "add.l #128,%[v] \n" /* value += 128; */ | ||
143 | "cmp.l #255,%[v] \n" /* overflow? */ | 140 | "cmp.l #255,%[v] \n" /* overflow? */ |
144 | "bls.b 1f \n" /* no: return value */ | 141 | "bls.b 1f \n" /* no: return value */ |
145 | /* yes: set low byte to appropriate boundary */ | 142 | /* yes: set low byte to appropriate boundary */ |
@@ -152,7 +149,6 @@ INLINE unsigned range_limit(int value) | |||
152 | #elif defined(CPU_ARM) | 149 | #elif defined(CPU_ARM) |
153 | /* Note: Uses knowledge that only the low byte of the result is used */ | 150 | /* Note: Uses knowledge that only the low byte of the result is used */ |
154 | asm ( | 151 | asm ( |
155 | "add %[v], %[v], #128 \n" /* value += 128 */ | ||
156 | "cmp %[v], #255 \n" /* out of range 0..255? */ | 152 | "cmp %[v], #255 \n" /* out of range 0..255? */ |
157 | "mvnhi %[v], %[v], asr #31 \n" /* yes: set all bits to ~(sign_bit) */ | 153 | "mvnhi %[v], %[v], asr #31 \n" /* yes: set all bits to ~(sign_bit) */ |
158 | : /* outputs */ | 154 | : /* outputs */ |
@@ -160,8 +156,6 @@ INLINE unsigned range_limit(int value) | |||
160 | ); | 156 | ); |
161 | return value; | 157 | return value; |
162 | #else | 158 | #else |
163 | value += 128; | ||
164 | |||
165 | if ((unsigned)value <= 255) | 159 | if ((unsigned)value <= 255) |
166 | return value; | 160 | return value; |
167 | 161 | ||
@@ -265,7 +259,7 @@ static void idct1h(int16_t *ws, unsigned char *out, int rows, int rowstep) | |||
265 | int row; | 259 | int row; |
266 | for (row = 0; row < rows; row++) | 260 | for (row = 0; row < rows; row++) |
267 | { | 261 | { |
268 | *out = range_limit((int) DESCALE(*ws, 3 + PASS1_BITS)); | 262 | *out = range_limit(128 + (int) DESCALE(*ws, 3 + PASS1_BITS)); |
269 | out += rowstep; | 263 | out += rowstep; |
270 | ws += 8; | 264 | ws += 8; |
271 | } | 265 | } |
@@ -291,7 +285,8 @@ static void idct2h(int16_t *ws, unsigned char *out, int rows, int rowstep) | |||
291 | int row; | 285 | int row; |
292 | for (row = 0; row < rows; row++) | 286 | for (row = 0; row < rows; row++) |
293 | { | 287 | { |
294 | int tmp1 = ws[0] + (ONE << (PASS1_BITS + 2)); | 288 | int tmp1 = ws[0] + (ONE << (PASS1_BITS + 2)) |
289 | + (128 << (PASS1_BITS + 3)); | ||
295 | int tmp2 = ws[1]; | 290 | int tmp2 = ws[1]; |
296 | out[JPEG_PIX_SZ*0] = range_limit((int) RIGHT_SHIFT(tmp1 + tmp2, | 291 | out[JPEG_PIX_SZ*0] = range_limit((int) RIGHT_SHIFT(tmp1 + tmp2, |
297 | PASS1_BITS + 3)); | 292 | PASS1_BITS + 3)); |
@@ -350,7 +345,8 @@ static void idct4h(int16_t *ws, unsigned char *out, int rows, int rowstep) | |||
350 | { | 345 | { |
351 | /* Even part */ | 346 | /* Even part */ |
352 | 347 | ||
353 | tmp0 = (int) ws[0] + (ONE << (PASS1_BITS + 2)); | 348 | tmp0 = (int) ws[0] + (ONE << (PASS1_BITS + 2) |
349 | + (128 << (PASS1_BITS + 3)); | ||
354 | tmp2 = (int) ws[2]; | 350 | tmp2 = (int) ws[2]; |
355 | 351 | ||
356 | tmp10 = (tmp0 + tmp2) << CONST_BITS; | 352 | tmp10 = (tmp0 + tmp2) << CONST_BITS; |
@@ -495,7 +491,7 @@ static void idct8h(int16_t *ws, unsigned char *out, int rows, int rowstep) | |||
495 | | ws[4] | ws[5] | ws[6] | ws[7]) == 0) | 491 | | ws[4] | ws[5] | ws[6] | ws[7]) == 0) |
496 | { | 492 | { |
497 | /* AC terms all zero */ | 493 | /* AC terms all zero */ |
498 | unsigned char dcval = range_limit((int) DESCALE((long) ws[0], | 494 | unsigned char dcval = range_limit(128 + (int) DESCALE((long) ws[0], |
499 | PASS1_BITS+3)); | 495 | PASS1_BITS+3)); |
500 | 496 | ||
501 | out[JPEG_PIX_SZ*0] = dcval; | 497 | out[JPEG_PIX_SZ*0] = dcval; |
@@ -520,7 +516,8 @@ static void idct8h(int16_t *ws, unsigned char *out, int rows, int rowstep) | |||
520 | tmp2 = z1 + MULTIPLY16(z3, - FIX_1_847759065); | 516 | tmp2 = z1 + MULTIPLY16(z3, - FIX_1_847759065); |
521 | tmp3 = z1 + MULTIPLY16(z2, FIX_0_765366865); | 517 | tmp3 = z1 + MULTIPLY16(z2, FIX_0_765366865); |
522 | 518 | ||
523 | z4 = (long) ws[0] + (ONE << (PASS1_BITS + 2)); | 519 | z4 = (long) ws[0] + (ONE << (PASS1_BITS + 2)) |
520 | + (128 << (PASS1_BITS + 3)); | ||
524 | z4 <<= CONST_BITS; | 521 | z4 <<= CONST_BITS; |
525 | z5 = (long) ws[4] << CONST_BITS; | 522 | z5 = (long) ws[4] << CONST_BITS; |
526 | tmp0 = z4 + z5; | 523 | tmp0 = z4 + z5; |
@@ -703,7 +700,8 @@ static void idct16h(int16_t *ws, unsigned char *out, int rows, int rowstep) | |||
703 | /* Even part */ | 700 | /* Even part */ |
704 | 701 | ||
705 | /* Add fudge factor here for final descale. */ | 702 | /* Add fudge factor here for final descale. */ |
706 | tmp0 = (long) ws[0] + (ONE << (PASS1_BITS+2)); | 703 | tmp0 = (long) ws[0] + (ONE << (PASS1_BITS+2)) |
704 | + (128 << (PASS1_BITS + 3)); | ||
707 | tmp0 <<= CONST_BITS; | 705 | tmp0 <<= CONST_BITS; |
708 | 706 | ||
709 | z1 = (long) ws[4]; | 707 | z1 = (long) ws[4]; |