summaryrefslogtreecommitdiff
path: root/apps
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2006-02-19 12:43:50 +0000
committerJens Arnold <amiconn@rockbox.org>2006-02-19 12:43:50 +0000
commit2ea75fdbec5a5df403e8fd18a7cb0b16b7cf69c5 (patch)
tree586678d390eb0b7bf2b9cb9584476ce85e7d94bf /apps
parentda5cef63301a2b13bbbcd36bf409a921fbb5402d (diff)
downloadrockbox-2ea75fdbec5a5df403e8fd18a7cb0b16b7cf69c5.tar.gz
rockbox-2ea75fdbec5a5df403e8fd18a7cb0b16b7cf69c5.zip
Replace the range_limit lookup table with an inline function, asm optimised for SH1 and coldfire. Slight speedup on SH1, up to 15% speedup on coldfire. Saves almost 1KB.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8736 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r--apps/plugins/jpeg.c183
1 files changed, 71 insertions, 112 deletions
diff --git a/apps/plugins/jpeg.c b/apps/plugins/jpeg.c
index 43c1ce111a..d08765db33 100644
--- a/apps/plugins/jpeg.c
+++ b/apps/plugins/jpeg.c
@@ -143,78 +143,47 @@ static struct plugin_api* rb;
143 143
144/**************** begin JPEG code ********************/ 144/**************** begin JPEG code ********************/
145 145
146/* LUT for IDCT, this could also be used for gamma correction */ 146INLINE unsigned range_limit(int value)
147const unsigned char range_limit[1024] =
148{ 147{
149 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, 148#if CONFIG_CPU == SH7034
150 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, 149 unsigned tmp;
151 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, 150 asm ( /* Note: Uses knowledge that only the low byte of the result is used */
152 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, 151 "mov #-128,%[t] \n"
153 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, 152 "sub %[t],%[v] \n" /* value -= -128; equals value += 128; */
154 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, 153 "extu.b %[v],%[t] \n"
155 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, 154 "cmp/eq %[v],%[t] \n" /* low byte == whole number ? */
156 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, 155 "bt 1f \n" /* yes: no overflow */
157 156 "cmp/pz %[v] \n" /* overflow: positive? */
158 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 157 "subc %[v],%[v] \n" /* %[r] now either 0 or 0xffffffff */
159 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 158 "1: \n"
160 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 159 : /* outputs */
161 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 160 [v]"+r"(value),
162 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 161 [t]"=&r"(tmp)
163 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 162 );
164 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 163 return value;
165 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 164#elif defined(CPU_COLDFIRE)
166 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 165 asm ( /* Note: Uses knowledge that only the low byte of the result is used */
167 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 166 "add.l #128,%[v] \n" /* value += 128; */
168 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 167 "cmp.l #255,%[v] \n" /* overflow? */
169 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 168 "bls.b 1f \n" /* no: return value */
170 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 169 "spl.b %[v] \n" /* yes: set low byte to appropriate boundary */
171 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 170 "1: \n"
172 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 171 : /* outputs */
173 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 172 [v]"+r"(value)
174 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 173 );
175 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 174 return value;
176 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 175#else
177 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 176 value += 128;
178 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
179 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
180 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
181 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
182
183 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
184 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
185 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
186 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
187 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
188 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
189 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
190 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
191 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
192 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
193 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
194 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
195 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
196 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
197 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
198 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
199 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
200 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
201 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
202 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
203 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
204 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
205 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
206 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
207
208 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
209 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
210 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
211 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
212 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
213 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
214 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
215 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
216};
217 177
178 if ((unsigned)value <= 255)
179 return value;
180
181 if (value < 0)
182 return 0;
183
184 return 255;
185#endif
186}
218 187
219/* IDCT implementation */ 188/* IDCT implementation */
220 189
@@ -266,8 +235,6 @@ const unsigned char range_limit[1024] =
266*/ 235*/
267#define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n)) 236#define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n))
268 237
269#define RANGE_MASK (255 * 4 + 3) /* 2 bits wider than legal samples */
270
271 238
272 239
273/* 240/*
@@ -277,7 +244,7 @@ const unsigned char range_limit[1024] =
277void idct1x1(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line) 244void idct1x1(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
278{ 245{
279 (void)skip_line; /* unused */ 246 (void)skip_line; /* unused */
280 *p_byte = range_limit[(inptr[0] * quantptr[0] >> 3) & RANGE_MASK]; 247 *p_byte = range_limit(inptr[0] * quantptr[0] >> 3);
281} 248}
282 249
283 250
@@ -312,18 +279,14 @@ void idct2x2(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
312 /* Row 0 */ 279 /* Row 0 */
313 outptr = p_byte; 280 outptr = p_byte;
314 281
315 outptr[0] = range_limit[(int) DESCALE(tmp0 + tmp1, 3) 282 outptr[0] = range_limit((int) DESCALE(tmp0 + tmp1, 3));
316 & RANGE_MASK]; 283 outptr[1] = range_limit((int) DESCALE(tmp0 - tmp1, 3));
317 outptr[1] = range_limit[(int) DESCALE(tmp0 - tmp1, 3)
318 & RANGE_MASK];
319 284
320 /* Row 1 */ 285 /* Row 1 */
321 outptr = p_byte + skip_line; 286 outptr = p_byte + skip_line;
322 287
323 outptr[0] = range_limit[(int) DESCALE(tmp2 + tmp3, 3) 288 outptr[0] = range_limit((int) DESCALE(tmp2 + tmp3, 3));
324 & RANGE_MASK]; 289 outptr[1] = range_limit((int) DESCALE(tmp2 - tmp3, 3));
325 outptr[1] = range_limit[(int) DESCALE(tmp2 - tmp3, 3)
326 & RANGE_MASK];
327} 290}
328 291
329 292
@@ -398,18 +361,14 @@ void idct4x4(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
398 361
399 /* Final output stage */ 362 /* Final output stage */
400 363
401 outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2, 364 outptr[0] = range_limit((int) DESCALE(tmp10 + tmp2,
402 CONST_BITS+PASS1_BITS+3) 365 CONST_BITS+PASS1_BITS+3));
403 & RANGE_MASK]; 366 outptr[3] = range_limit((int) DESCALE(tmp10 - tmp2,
404 outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2, 367 CONST_BITS+PASS1_BITS+3));
405 CONST_BITS+PASS1_BITS+3) 368 outptr[1] = range_limit((int) DESCALE(tmp12 + tmp0,
406 & RANGE_MASK]; 369 CONST_BITS+PASS1_BITS+3));
407 outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0, 370 outptr[2] = range_limit((int) DESCALE(tmp12 - tmp0,
408 CONST_BITS+PASS1_BITS+3) 371 CONST_BITS+PASS1_BITS+3));
409 & RANGE_MASK];
410 outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
411 CONST_BITS+PASS1_BITS+3)
412 & RANGE_MASK];
413 372
414 wsptr += 4; /* advance pointer to next row */ 373 wsptr += 4; /* advance pointer to next row */
415 } 374 }
@@ -549,8 +508,8 @@ void idct8x8(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
549 | wsptr[4] | wsptr[5] | wsptr[6] | wsptr[7]) == 0) 508 | wsptr[4] | wsptr[5] | wsptr[6] | wsptr[7]) == 0)
550 { 509 {
551 /* AC terms all zero */ 510 /* AC terms all zero */
552 unsigned char dcval = range_limit[(int) DESCALE((long) wsptr[0], 511 unsigned char dcval = range_limit((int) DESCALE((long) wsptr[0],
553 PASS1_BITS+3) & RANGE_MASK]; 512 PASS1_BITS+3));
554 513
555 outptr[0] = dcval; 514 outptr[0] = dcval;
556 outptr[1] = dcval; 515 outptr[1] = dcval;
@@ -617,22 +576,22 @@ void idct8x8(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
617 576
618 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 577 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
619 578
620 outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3, 579 outptr[0] = range_limit((int) DESCALE(tmp10 + tmp3,
621 CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; 580 CONST_BITS+PASS1_BITS+3));
622 outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3, 581 outptr[7] = range_limit((int) DESCALE(tmp10 - tmp3,
623 CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; 582 CONST_BITS+PASS1_BITS+3));
624 outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2, 583 outptr[1] = range_limit((int) DESCALE(tmp11 + tmp2,
625 CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; 584 CONST_BITS+PASS1_BITS+3));
626 outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2, 585 outptr[6] = range_limit((int) DESCALE(tmp11 - tmp2,
627 CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; 586 CONST_BITS+PASS1_BITS+3));
628 outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1, 587 outptr[2] = range_limit((int) DESCALE(tmp12 + tmp1,
629 CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; 588 CONST_BITS+PASS1_BITS+3));
630 outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1, 589 outptr[5] = range_limit((int) DESCALE(tmp12 - tmp1,
631 CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; 590 CONST_BITS+PASS1_BITS+3));
632 outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0, 591 outptr[3] = range_limit((int) DESCALE(tmp13 + tmp0,
633 CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; 592 CONST_BITS+PASS1_BITS+3));
634 outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0, 593 outptr[4] = range_limit((int) DESCALE(tmp13 - tmp0,
635 CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; 594 CONST_BITS+PASS1_BITS+3));
636 595
637 wsptr += 8; /* advance pointer to next row */ 596 wsptr += 8; /* advance pointer to next row */
638 } 597 }
@@ -2273,7 +2232,7 @@ int jpegmem(struct jpeg *p_jpg, int ds)
2273{ 2232{
2274 int size; 2233 int size;
2275 2234
2276 size = (p_jpg->x_phys/ds/p_jpg->subsample_x[0]) 2235 size = (p_jpg->x_phys/ds/p_jpg->subsample_x[0])
2277 * (p_jpg->y_phys/ds/p_jpg->subsample_y[0]); 2236 * (p_jpg->y_phys/ds/p_jpg->subsample_y[0]);
2278#ifdef HAVE_LCD_COLOR 2237#ifdef HAVE_LCD_COLOR
2279 if (p_jpg->blocks > 1) /* colour, add requirements for chroma */ 2238 if (p_jpg->blocks > 1) /* colour, add requirements for chroma */