summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Mahone <andrew.mahone@gmail.com>2009-05-26 20:00:47 +0000
committerAndrew Mahone <andrew.mahone@gmail.com>2009-05-26 20:00:47 +0000
commit92785b8f2f20b0fc16de7e771e5eb55fd8497ff8 (patch)
tree3af2399c1e1be8e56cb1b5e6787efd738dad6d52
parentc4ed88f59302882749023268ac456c415a4b1243 (diff)
downloadrockbox-92785b8f2f20b0fc16de7e771e5eb55fd8497ff8.tar.gz
rockbox-92785b8f2f20b0fc16de7e771e5eb55fd8497ff8.zip
Use pre-multiplication in scaler to save one multiply per color component on ARM and Coldfire, at the cost of an extra add/shift in the horizontal scaler to reduce values to a workable range. SH-1 retains the same basic math, as
the use of 16x16->32 hardware multiplication in the earlier scaler stages saves more than removing the 32x32->40 multiply to descale output. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21091 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/plugins/bench_scaler.c4
-rw-r--r--apps/plugins/lib/grey_draw.c2
-rw-r--r--apps/plugins/pictureflow/pictureflow.c49
-rw-r--r--apps/recorder/resize.c409
-rw-r--r--apps/recorder/resize.h100
5 files changed, 354 insertions, 210 deletions
diff --git a/apps/plugins/bench_scaler.c b/apps/plugins/bench_scaler.c
index c24807dad6..246271dcc8 100644
--- a/apps/plugins/bench_scaler.c
+++ b/apps/plugins/bench_scaler.c
@@ -49,8 +49,8 @@ static void output_row_null(uint32_t row, void * row_in,
49#else 49#else
50 uint32_t *lim = in + ctx->bm->width; 50 uint32_t *lim = in + ctx->bm->width;
51#endif 51#endif
52 for (; in < lim; in++) 52 while (in < lim)
53 output = SC_MUL(*in + ctx->round, ctx->divisor); 53 output = SC_OUT(*in++, ctx);
54 return; 54 return;
55} 55}
56 56
diff --git a/apps/plugins/lib/grey_draw.c b/apps/plugins/lib/grey_draw.c
index 6315ad9b1a..c1e6376cfe 100644
--- a/apps/plugins/lib/grey_draw.c
+++ b/apps/plugins/lib/grey_draw.c
@@ -733,7 +733,7 @@ static void output_row_grey_32(uint32_t row, void * row_in,
733 uint32_t *qp = (uint32_t*)row_in; 733 uint32_t *qp = (uint32_t*)row_in;
734 uint8_t *dest = (uint8_t*)ctx->bm->data + ctx->bm->width * row; 734 uint8_t *dest = (uint8_t*)ctx->bm->data + ctx->bm->width * row;
735 for (col = 0; col < ctx->bm->width; col++) 735 for (col = 0; col < ctx->bm->width; col++)
736 *dest++ = SC_MUL((*qp++) + ctx->round,ctx->divisor); 736 *dest++ = SC_OUT(*qp++, ctx);
737} 737}
738 738
739static unsigned int get_size_grey(struct bitmap *bm) 739static unsigned int get_size_grey(struct bitmap *bm)
diff --git a/apps/plugins/pictureflow/pictureflow.c b/apps/plugins/pictureflow/pictureflow.c
index a1ad3d2776..bbe2541681 100644
--- a/apps/plugins/pictureflow/pictureflow.c
+++ b/apps/plugins/pictureflow/pictureflow.c
@@ -592,25 +592,12 @@ static inline PFreal fcos(int iangle)
592 return fsin(iangle + (IANGLE_MAX >> 2)); 592 return fsin(iangle + (IANGLE_MAX >> 2));
593} 593}
594 594
595static inline uint32_t div255(uint32_t val) 595static inline unsigned scale_val(unsigned val, unsigned bits)
596{ 596{
597 return ((((val >> 8) + val) >> 8) + val) >> 8; 597 val = val * ((1 << bits) - 1);
598 return ((val >> 8) + val + 128) >> 8;
598} 599}
599 600
600#define SCALE_VAL(val,out) div255((val) * (out) + 127)
601#define SCALE_VAL32(val, out) \
602({ \
603 uint32_t val__ = (val) * (out); \
604 val__ = ((((val__ >> 8) + val__) >> 8) + val__ + 128) >> 8; \
605 val__; \
606})
607#define SCALE_VAL8(val, out) \
608({ \
609 unsigned val__ = (val) * (out); \
610 val__ = ((val__ >> 8) + val__ + 128) >> 8; \
611 val__; \
612})
613
614static void output_row_8_transposed(uint32_t row, void * row_in, 601static void output_row_8_transposed(uint32_t row, void * row_in,
615 struct scaler_context *ctx) 602 struct scaler_context *ctx)
616{ 603{
@@ -625,9 +612,9 @@ static void output_row_8_transposed(uint32_t row, void * row_in,
625 unsigned r, g, b; 612 unsigned r, g, b;
626 for (; dest < end; dest += ctx->bm->height) 613 for (; dest < end; dest += ctx->bm->height)
627 { 614 {
628 r = SCALE_VAL8(qp->red, 31); 615 r = scale_val(qp->red, 5);
629 g = SCALE_VAL8(qp->green, 63); 616 g = scale_val(qp->green, 6);
630 b = SCALE_VAL8((qp++)->blue, 31); 617 b = scale_val((qp++)->blue, 5);
631 *dest = LCD_RGBPACK_LCD(r,g,b); 618 *dest = LCD_RGBPACK_LCD(r,g,b);
632 } 619 }
633#endif 620#endif
@@ -641,19 +628,15 @@ static void output_row_32_transposed(uint32_t row, void * row_in,
641#ifdef USEGSLIB 628#ifdef USEGSLIB
642 uint32_t *qp = (uint32_t*)row_in; 629 uint32_t *qp = (uint32_t*)row_in;
643 for (; dest < end; dest += ctx->bm->height) 630 for (; dest < end; dest += ctx->bm->height)
644 *dest = SC_MUL((*qp++) + ctx->round, ctx->divisor); 631 *dest = SC_OUT(*qp++, ctx);
645#else 632#else
646 struct uint32_rgb *qp = (struct uint32_rgb*)row_in; 633 struct uint32_rgb *qp = (struct uint32_rgb*)row_in;
647 uint32_t rb_mul = SCALE_VAL32(ctx->divisor, 31),
648 rb_rnd = SCALE_VAL32(ctx->round, 31),
649 g_mul = SCALE_VAL32(ctx->divisor, 63),
650 g_rnd = SCALE_VAL32(ctx->round, 63);
651 int r, g, b; 634 int r, g, b;
652 for (; dest < end; dest += ctx->bm->height) 635 for (; dest < end; dest += ctx->bm->height)
653 { 636 {
654 r = SC_MUL(qp->r + rb_rnd, rb_mul); 637 r = scale_val(SC_OUT(qp->r, ctx), 5);
655 g = SC_MUL(qp->g + g_rnd, g_mul); 638 g = scale_val(SC_OUT(qp->g, ctx), 6);
656 b = SC_MUL(qp->b + rb_rnd, rb_mul); 639 b = scale_val(SC_OUT(qp->b, ctx), 5);
657 qp++; 640 qp++;
658 *dest = LCD_RGBPACK_LCD(r,g,b); 641 *dest = LCD_RGBPACK_LCD(r,g,b);
659 } 642 }
@@ -670,14 +653,14 @@ static void output_row_32_transposed_fromyuv(uint32_t row, void * row_in,
670 for (; dest < end; dest += ctx->bm->height) 653 for (; dest < end; dest += ctx->bm->height)
671 { 654 {
672 unsigned r, g, b, y, u, v; 655 unsigned r, g, b, y, u, v;
673 y = SC_MUL(qp->b + ctx->round, ctx->divisor); 656 y = SC_OUT(qp->b, ctx);
674 u = SC_MUL(qp->g + ctx->round, ctx->divisor); 657 u = SC_OUT(qp->g, ctx);
675 v = SC_MUL(qp->r + ctx->round, ctx->divisor); 658 v = SC_OUT(qp->r, ctx);
676 qp++; 659 qp++;
677 yuv_to_rgb(y, u, v, &r, &g, &b); 660 yuv_to_rgb(y, u, v, &r, &g, &b);
678 r = (31 * r + (r >> 3) + 127) >> 8; 661 r = scale_val(r, 5);
679 g = (63 * g + (g >> 2) + 127) >> 8; 662 g = scale_val(g, 6);
680 b = (31 * b + (b >> 3) + 127) >> 8; 663 b = scale_val(b, 5);
681 *dest = LCD_RGBPACK_LCD(r, g, b); 664 *dest = LCD_RGBPACK_LCD(r, g, b);
682 } 665 }
683} 666}
diff --git a/apps/recorder/resize.c b/apps/recorder/resize.c
index 1e9210e819..3a0ad8d75b 100644
--- a/apps/recorder/resize.c
+++ b/apps/recorder/resize.c
@@ -131,20 +131,45 @@ int recalc_dimension(struct dim *dst, struct dim *src)
131 return false; \ 131 return false; \
132} 132}
133 133
134/* Set up rounding and scale factors for horizontal area scaler */ 134#if defined(CPU_COLDFIRE)
135static inline void scale_h_area_setup(struct scaler_context *ctx) 135#define MAC(op1, op2, num) \
136 asm volatile( \
137 "mac.l %0, %1, %%acc" #num \
138 : \
139 : "%d" (op1), "d" (op2)\
140 )
141#define MAC_OUT(dest, num) \
142 asm volatile( \
143 "movclr.l %%acc" #num ", %0" \
144 : "=d" (dest) \
145 )
146#elif defined(CPU_SH)
147/* calculate the 32-bit product of unsigned 16-bit op1 and op2 */
148static inline int32_t mul_s16_s16(int16_t op1, int16_t op2)
136{ 149{
137/* sum is output value * src->width */ 150 return (int32_t)(op1 * op2);
138 SDEBUGF("scale_h_area_setup\n");
139 ctx->divisor = ctx->src->width;
140} 151}
141 152
153/* calculate the 32-bit product of signed 16-bit op1 and op2 */
154static inline uint32_t mul_u16_u16(uint16_t op1, uint16_t op2)
155{
156 return (uint32_t)(op1 * op2);
157}
158#endif
159
142/* horizontal area average scaler */ 160/* horizontal area average scaler */
143static bool scale_h_area(void *out_line_ptr, 161static bool scale_h_area(void *out_line_ptr,
144 struct scaler_context *ctx, bool accum) 162 struct scaler_context *ctx, bool accum)
145{ 163{
146 SDEBUGF("scale_h_area\n"); 164 SDEBUGF("scale_h_area\n");
147 unsigned int ix, ox, oxe, mul; 165 unsigned int ix, ox, oxe, mul;
166#if defined(CPU_SH) || defined (TEST_SH_MATH)
167 const uint32_t h_i_val = ctx->src->width,
168 h_o_val = ctx->bm->width;
169#else
170 const uint32_t h_i_val = ctx->h_i_val,
171 h_o_val = ctx->h_o_val;
172#endif
148#ifdef HAVE_LCD_COLOR 173#ifdef HAVE_LCD_COLOR
149 struct uint32_rgb rgbvalacc = { 0, 0, 0 }, 174 struct uint32_rgb rgbvalacc = { 0, 0, 0 },
150 rgbvaltmp = { 0, 0, 0 }, 175 rgbvaltmp = { 0, 0, 0 },
@@ -161,31 +186,57 @@ static bool scale_h_area(void *out_line_ptr,
161 yield(); 186 yield();
162 for (ix = 0; ix < (unsigned int)ctx->src->width; ix++) 187 for (ix = 0; ix < (unsigned int)ctx->src->width; ix++)
163 { 188 {
164 oxe += ctx->bm->width; 189 oxe += h_o_val;
165 /* end of current area has been reached */ 190 /* end of current area has been reached */
166 /* fill buffer if needed */ 191 /* fill buffer if needed */
167 FILL_BUF(part,ctx->store_part,ctx->args); 192 FILL_BUF(part,ctx->store_part,ctx->args);
168#ifdef HAVE_LCD_COLOR 193#ifdef HAVE_LCD_COLOR
169 if (oxe >= (unsigned int)ctx->src->width) 194 if (oxe >= h_i_val)
170 { 195 {
171 /* "reset" error, which now represents partial coverage of next 196 /* "reset" error, which now represents partial coverage of next
172 pixel by the next area 197 pixel by the next area
173 */ 198 */
174 oxe -= ctx->src->width; 199 oxe -= h_i_val;
175 200
201#if defined(CPU_COLDFIRE)
202/* Coldfire EMAC math */
176 /* add saved partial pixel from start of area */ 203 /* add saved partial pixel from start of area */
177 rgbvalacc.r = rgbvalacc.r * ctx->bm->width + rgbvaltmp.r * mul; 204 MAC(rgbvalacc.r, h_o_val, 0);
178 rgbvalacc.g = rgbvalacc.g * ctx->bm->width + rgbvaltmp.g * mul; 205 MAC(rgbvalacc.g, h_o_val, 1);
179 rgbvalacc.b = rgbvalacc.b * ctx->bm->width + rgbvaltmp.b * mul; 206 MAC(rgbvalacc.b, h_o_val, 2);
207 MAC(rgbvaltmp.r, mul, 0);
208 MAC(rgbvaltmp.g, mul, 1);
209 MAC(rgbvaltmp.b, mul, 2);
210 /* get new pixel , then add its partial coverage to this area */
211 mul = h_o_val - oxe;
212 rgbvaltmp.r = part->buf->red;
213 rgbvaltmp.g = part->buf->green;
214 rgbvaltmp.b = part->buf->blue;
215 MAC(rgbvaltmp.r, mul, 0);
216 MAC(rgbvaltmp.g, mul, 1);
217 MAC(rgbvaltmp.b, mul, 2);
218 MAC_OUT(rgbvalacc.r, 0);
219 MAC_OUT(rgbvalacc.g, 1);
220 MAC_OUT(rgbvalacc.b, 2);
221#else
222/* generic C math */
223 /* add saved partial pixel from start of area */
224 rgbvalacc.r = rgbvalacc.r * h_o_val + rgbvaltmp.r * mul;
225 rgbvalacc.g = rgbvalacc.g * h_o_val + rgbvaltmp.g * mul;
226 rgbvalacc.b = rgbvalacc.b * h_o_val + rgbvaltmp.b * mul;
180 227
181 /* get new pixel , then add its partial coverage to this area */ 228 /* get new pixel , then add its partial coverage to this area */
182 rgbvaltmp.r = part->buf->red; 229 rgbvaltmp.r = part->buf->red;
183 rgbvaltmp.g = part->buf->green; 230 rgbvaltmp.g = part->buf->green;
184 rgbvaltmp.b = part->buf->blue; 231 rgbvaltmp.b = part->buf->blue;
185 mul = ctx->bm->width - oxe; 232 mul = h_o_val - oxe;
186 rgbvalacc.r += rgbvaltmp.r * mul; 233 rgbvalacc.r += rgbvaltmp.r * mul;
187 rgbvalacc.g += rgbvaltmp.g * mul; 234 rgbvalacc.g += rgbvaltmp.g * mul;
188 rgbvalacc.b += rgbvaltmp.b * mul; 235 rgbvalacc.b += rgbvaltmp.b * mul;
236#endif /* CPU */
237 rgbvalacc.r = (rgbvalacc.r + (1 << 21)) >> 22;
238 rgbvalacc.g = (rgbvalacc.g + (1 << 21)) >> 22;
239 rgbvalacc.b = (rgbvalacc.b + (1 << 21)) >> 22;
189 /* store or accumulate to output row */ 240 /* store or accumulate to output row */
190 if (accum) 241 if (accum)
191 { 242 {
@@ -200,7 +251,7 @@ static bool scale_h_area(void *out_line_ptr,
200 rgbvalacc.r = 0; 251 rgbvalacc.r = 0;
201 rgbvalacc.g = 0; 252 rgbvalacc.g = 0;
202 rgbvalacc.b = 0; 253 rgbvalacc.b = 0;
203 mul = ctx->bm->width - mul; 254 mul = oxe;
204 ox += 1; 255 ox += 1;
205 /* inside an area */ 256 /* inside an area */
206 } else { 257 } else {
@@ -210,21 +261,45 @@ static bool scale_h_area(void *out_line_ptr,
210 rgbvalacc.b += part->buf->blue; 261 rgbvalacc.b += part->buf->blue;
211 } 262 }
212#else 263#else
213 if (oxe >= (unsigned int)ctx->src->width) 264 if (oxe >= h_i_val)
214 { 265 {
215 /* "reset" error, which now represents partial coverage of next 266 /* "reset" error, which now represents partial coverage of next
216 pixel by the next area 267 pixel by the next area
217 */ 268 */
218 oxe -= ctx->src->width; 269 oxe -= h_i_val;
270#if defined(CPU_COLDFIRE)
271/* Coldfire EMAC math */
272 /* add saved partial pixel from start of area */
273 MAC(acc, h_o_val, 0);
274 MAC(tmp, mul, 0);
275 /* get new pixel , then add its partial coverage to this area */
276 tmp = *(part->buf);
277 mul = h_o_val - oxe;
278 MAC(tmp, mul, 0);
279 MAC_OUT(acc, 0);
280#elif defined(CPU_SH)
281/* SH-1 16x16->32 math */
282 /* add saved partial pixel from start of area */
283 acc = mul_u16_u16(acc, h_o_val) + mul_u16_u16(tmp, mul);
219 284
285 /* get new pixel , then add its partial coverage to this area */
286 tmp = *(part->buf);
287 mul = h_o_val - oxe;
288 acc += mul_u16_u16(tmp, mul);
289#else
290/* generic C math */
220 /* add saved partial pixel from start of area */ 291 /* add saved partial pixel from start of area */
221 acc = MULUQ(acc, ctx->bm->width) + MULUQ(tmp, mul); 292 acc = (acc * h_o_val) + (tmp * mul);
222 293
223 /* get new pixel , then add its partial coverage to this area */ 294 /* get new pixel , then add its partial coverage to this area */
224 tmp = *(part->buf); 295 tmp = *(part->buf);
225 mul = ctx->bm->width - oxe; 296 mul = h_o_val - oxe;
226 acc += MULUQ(tmp, mul); 297 acc += tmp * mul;
298#endif /* CPU */
299#if !(defined(CPU_SH) || defined(TEST_SH_MATH))
227 /* round, divide, and either store or accumulate to output row */ 300 /* round, divide, and either store or accumulate to output row */
301 acc = (acc + (1 << 21)) >> 22;
302#endif
228 if (accum) 303 if (accum)
229 { 304 {
230 acc += out_line[ox]; 305 acc += out_line[ox];
@@ -232,7 +307,7 @@ static bool scale_h_area(void *out_line_ptr,
232 out_line[ox] = acc; 307 out_line[ox] = acc;
233 /* reset accumulator */ 308 /* reset accumulator */
234 acc = 0; 309 acc = 0;
235 mul = ctx->bm->width - mul; 310 mul = oxe;
236 ox += 1; 311 ox += 1;
237 /* inside an area */ 312 /* inside an area */
238 } else { 313 } else {
@@ -249,56 +324,56 @@ static bool scale_h_area(void *out_line_ptr,
249/* vertical area average scaler */ 324/* vertical area average scaler */
250static inline bool scale_v_area(struct rowset *rset, struct scaler_context *ctx) 325static inline bool scale_v_area(struct rowset *rset, struct scaler_context *ctx)
251{ 326{
252 uint32_t mul, x, oy, iy, oye; 327 uint32_t mul, oy, iy, oye;
328#if defined(CPU_SH) || defined (TEST_SH_MATH)
329 const uint32_t v_i_val = ctx->src->height,
330 v_o_val = ctx->bm->height;
331#else
332 const uint32_t v_i_val = ctx->v_i_val,
333 v_o_val = ctx->v_o_val;
334#endif
253 335
254 /* Set up rounding and scale factors */ 336 /* Set up rounding and scale factors */
255 ctx->divisor *= ctx->src->height;
256 ctx->round = ctx->divisor >> 1;
257 ctx->divisor = 1 + (-((ctx->divisor + 1) >> 1)) / ctx->divisor;
258 mul = 0; 337 mul = 0;
259 oy = rset->rowstart; 338 oy = rset->rowstart;
260 oye = 0; 339 oye = 0;
261#ifdef HAVE_LCD_COLOR 340#ifdef HAVE_LCD_COLOR
262 uint32_t *rowacc = (uint32_t *) ctx->buf, 341 uint32_t *rowacc = (uint32_t *) ctx->buf,
263 *rowtmp = rowacc + 3 * ctx->bm->width; 342 *rowtmp = rowacc + 3 * ctx->bm->width,
343 *rowacc_px, *rowtmp_px;
264 memset((void *)ctx->buf, 0, ctx->bm->width * 2 * sizeof(struct uint32_rgb)); 344 memset((void *)ctx->buf, 0, ctx->bm->width * 2 * sizeof(struct uint32_rgb));
265#else 345#else
266 uint32_t *rowacc = (uint32_t *) ctx->buf, 346 uint32_t *rowacc = (uint32_t *) ctx->buf,
267 *rowtmp = rowacc + ctx->bm->width; 347 *rowtmp = rowacc + ctx->bm->width,
348 *rowacc_px, *rowtmp_px;
268 memset((void *)ctx->buf, 0, ctx->bm->width * 2 * sizeof(uint32_t)); 349 memset((void *)ctx->buf, 0, ctx->bm->width * 2 * sizeof(uint32_t));
269#endif 350#endif
270 SDEBUGF("scale_v_area\n"); 351 SDEBUGF("scale_v_area\n");
271 /* zero the accumulator and temp rows */ 352 /* zero the accumulator and temp rows */
272 for (iy = 0; iy < (unsigned int)ctx->src->height; iy++) 353 for (iy = 0; iy < (unsigned int)ctx->src->height; iy++)
273 { 354 {
274 oye += ctx->bm->height; 355 oye += v_o_val;
275 /* end of current area has been reached */ 356 /* end of current area has been reached */
276 if (oye >= (unsigned int)ctx->src->height) 357 if (oye >= v_i_val)
277 { 358 {
278 /* "reset" error, which now represents partial coverage of the next 359 /* "reset" error, which now represents partial coverage of the next
279 row by the next area 360 row by the next area
280 */ 361 */
281 oye -= ctx->src->height; 362 oye -= v_i_val;
282 /* add stored partial row to accumulator */ 363 /* add stored partial row to accumulator */
283#ifdef HAVE_LCD_COLOR 364 for(rowacc_px = rowacc, rowtmp_px = rowtmp; rowacc_px != rowtmp;
284 for (x = 0; x < 3 * (unsigned int)ctx->bm->width; x++) 365 rowacc_px++, rowtmp_px++)
285#else 366 *rowacc_px = *rowacc_px * v_o_val + *rowtmp_px * mul;
286 for (x = 0; x < (unsigned int)ctx->bm->width; x++)
287#endif
288 rowacc[x] = rowacc[x] * ctx->bm->height + mul * rowtmp[x];
289 /* store new scaled row in temp row */ 367 /* store new scaled row in temp row */
290 if(!ctx->h_scaler(rowtmp, ctx, false)) 368 if(!ctx->h_scaler(rowtmp, ctx, false))
291 return false; 369 return false;
292 /* add partial coverage by new row to this area, then round and 370 /* add partial coverage by new row to this area, then round and
293 scale to final value 371 scale to final value
294 */ 372 */
295 mul = ctx->bm->height - oye; 373 mul = v_o_val - oye;
296#ifdef HAVE_LCD_COLOR 374 for(rowacc_px = rowacc, rowtmp_px = rowtmp; rowacc_px != rowtmp;
297 for (x = 0; x < 3 * (unsigned int)ctx->bm->width; x++) 375 rowacc_px++, rowtmp_px++)
298#else 376 *rowacc_px += mul * *rowtmp_px;
299 for (x = 0; x < (unsigned int)ctx->bm->width; x++)
300#endif
301 rowacc[x] += mul * rowtmp[x];
302 ctx->output_row(oy, (void*)rowacc, ctx); 377 ctx->output_row(oy, (void*)rowacc, ctx);
303 /* clear accumulator row, store partial coverage for next row */ 378 /* clear accumulator row, store partial coverage for next row */
304#ifdef HAVE_LCD_COLOR 379#ifdef HAVE_LCD_COLOR
@@ -319,20 +394,18 @@ static inline bool scale_v_area(struct rowset *rset, struct scaler_context *ctx)
319} 394}
320 395
321#ifdef HAVE_UPSCALER 396#ifdef HAVE_UPSCALER
322/* Set up rounding and scale factors for the horizontal scaler. The divisor
323 is bm->width - 1, so that the first and last pixels in the row align
324 exactly between input and output
325*/
326static inline void scale_h_linear_setup(struct scaler_context *ctx)
327{
328 ctx->divisor = ctx->bm->width - 1;
329}
330
331/* horizontal linear scaler */ 397/* horizontal linear scaler */
332static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx, 398static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
333 bool accum) 399 bool accum)
334{ 400{
335 unsigned int ix, ox, ixe; 401 unsigned int ix, ox, ixe;
402#if defined(CPU_SH) || defined (TEST_SH_MATH)
403 const uint32_t h_i_val = ctx->src->width - 1,
404 h_o_val = ctx->bm->width - 1;
405#else
406 const uint32_t h_i_val = ctx->h_i_val,
407 h_o_val = ctx->h_o_val;
408#endif
336 /* type x = x is an ugly hack for hiding an unitialized data warning. The 409 /* type x = x is an ugly hack for hiding an unitialized data warning. The
337 values are conditionally initialized before use, but other values are 410 values are conditionally initialized before use, but other values are
338 set such that this will occur before these are used. 411 set such that this will occur before these are used.
@@ -348,27 +421,35 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
348 FILL_BUF_INIT(part,ctx->store_part,ctx->args); 421 FILL_BUF_INIT(part,ctx->store_part,ctx->args);
349 ix = 0; 422 ix = 0;
350 /* The error is set so that values are initialized on the first pass. */ 423 /* The error is set so that values are initialized on the first pass. */
351 ixe = ctx->bm->width - 1; 424 ixe = h_o_val;
352 /* give other tasks a chance to run */ 425 /* give other tasks a chance to run */
353 yield(); 426 yield();
354 for (ox = 0; ox < (uint32_t)ctx->bm->width; ox++) 427 for (ox = 0; ox < (uint32_t)ctx->bm->width; ox++)
355 { 428 {
356#ifdef HAVE_LCD_COLOR 429#ifdef HAVE_LCD_COLOR
357 if (ixe >= ((uint32_t)ctx->bm->width - 1)) 430 if (ixe >= h_o_val)
358 { 431 {
359 /* Store the new "current" pixel value in rgbval, and the color 432 /* Store the new "current" pixel value in rgbval, and the color
360 step value in rgbinc. 433 step value in rgbinc.
361 */ 434 */
362 ixe -= (ctx->bm->width - 1); 435 ixe -= h_o_val;
363 rgbinc.r = -(part->buf->red); 436 rgbinc.r = -(part->buf->red);
364 rgbinc.g = -(part->buf->green); 437 rgbinc.g = -(part->buf->green);
365 rgbinc.b = -(part->buf->blue); 438 rgbinc.b = -(part->buf->blue);
366 rgbval.r = (part->buf->red) * (ctx->bm->width - 1); 439#if defined(CPU_COLDFIRE)
367 rgbval.g = (part->buf->green) * (ctx->bm->width - 1); 440/* Coldfire EMAC math */
368 rgbval.b = (part->buf->blue) * (ctx->bm->width - 1); 441 MAC(part->buf->red, h_o_val, 0);
442 MAC(part->buf->green, h_o_val, 1);
443 MAC(part->buf->blue, h_o_val, 2);
444#else
445/* generic C math */
446 rgbval.r = (part->buf->red) * h_o_val;
447 rgbval.g = (part->buf->green) * h_o_val;
448 rgbval.b = (part->buf->blue) * h_o_val;
449#endif /* CPU */
369 ix += 1; 450 ix += 1;
370 /* If this wasn't the last pixel, add the next one to rgbinc. */ 451 /* If this wasn't the last pixel, add the next one to rgbinc. */
371 if (ix < (uint32_t)ctx->src->width) { 452 if (LIKELY(ix < (uint32_t)ctx->src->width)) {
372 part->buf++; 453 part->buf++;
373 part->len--; 454 part->len--;
374 /* Fetch new pixels if needed */ 455 /* Fetch new pixels if needed */
@@ -379,14 +460,28 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
379 /* Add a partial step to rgbval, in this pixel isn't precisely 460 /* Add a partial step to rgbval, in this pixel isn't precisely
380 aligned with the new source pixel 461 aligned with the new source pixel
381 */ 462 */
463#if defined(CPU_COLDFIRE)
464/* Coldfire EMAC math */
465 MAC(rgbinc.r, ixe, 0);
466 MAC(rgbinc.g, ixe, 1);
467 MAC(rgbinc.b, ixe, 2);
468#else
469/* generic C math */
382 rgbval.r += rgbinc.r * ixe; 470 rgbval.r += rgbinc.r * ixe;
383 rgbval.g += rgbinc.g * ixe; 471 rgbval.g += rgbinc.g * ixe;
384 rgbval.b += rgbinc.b * ixe; 472 rgbval.b += rgbinc.b * ixe;
473#endif
385 } 474 }
386 /* Now multiple the color increment to its proper value */ 475#if defined(CPU_COLDFIRE)
387 rgbinc.r *= ctx->src->width - 1; 476/* get final EMAC result out of ACC registers */
388 rgbinc.g *= ctx->src->width - 1; 477 MAC_OUT(rgbval.r, 0);
389 rgbinc.b *= ctx->src->width - 1; 478 MAC_OUT(rgbval.g, 1);
479 MAC_OUT(rgbval.b, 2);
480#endif
481 /* Now multiply the color increment to its proper value */
482 rgbinc.r *= h_i_val;
483 rgbinc.g *= h_i_val;
484 rgbinc.b *= h_i_val;
390 } else { 485 } else {
391 rgbval.r += rgbinc.r; 486 rgbval.r += rgbinc.r;
392 rgbval.g += rgbinc.g; 487 rgbval.g += rgbinc.g;
@@ -395,27 +490,36 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
395 /* round and scale values, and accumulate or store to output */ 490 /* round and scale values, and accumulate or store to output */
396 if (accum) 491 if (accum)
397 { 492 {
398 out_line[ox].r += rgbval.r; 493 out_line[ox].r += (rgbval.r + (1 << 21)) >> 22;
399 out_line[ox].g += rgbval.g; 494 out_line[ox].g += (rgbval.g + (1 << 21)) >> 22;
400 out_line[ox].b += rgbval.b; 495 out_line[ox].b += (rgbval.b + (1 << 21)) >> 22;
401 } else { 496 } else {
402 out_line[ox].r = rgbval.r; 497 out_line[ox].r = (rgbval.r + (1 << 21)) >> 22;
403 out_line[ox].g = rgbval.g; 498 out_line[ox].g = (rgbval.g + (1 << 21)) >> 22;
404 out_line[ox].b = rgbval.b; 499 out_line[ox].b = (rgbval.b + (1 << 21)) >> 22;
405 } 500 }
406#else 501#else
407 if (ixe >= ((uint32_t)ctx->bm->width - 1)) 502 if (ixe >= h_o_val)
408 { 503 {
409 /* Store the new "current" pixel value in rgbval, and the color 504 /* Store the new "current" pixel value in rgbval, and the color
410 step value in rgbinc. 505 step value in rgbinc.
411 */ 506 */
412 ixe -= (ctx->bm->width - 1); 507 ixe -= h_o_val;
413 val = *(part->buf); 508 val = *(part->buf);
414 inc = -val; 509 inc = -val;
415 val = MULUQ(val, ctx->bm->width - 1); 510#if defined(CPU_COLDFIRE)
511/* Coldfire EMAC math */
512 MAC(val, h_o_val, 0);
513#elif defined(CPU_SH)
514/* SH-1 16x16->32 math */
515 val = mul_u16_u16(val, h_o_val);
516#else
517/* generic C math */
518 val = val * h_o_val;
519#endif
416 ix += 1; 520 ix += 1;
417 /* If this wasn't the last pixel, add the next one to rgbinc. */ 521 /* If this wasn't the last pixel, add the next one to rgbinc. */
418 if (ix < (uint32_t)ctx->src->width) { 522 if (LIKELY(ix < (uint32_t)ctx->src->width)) {
419 part->buf++; 523 part->buf++;
420 part->len--; 524 part->len--;
421 /* Fetch new pixels if needed */ 525 /* Fetch new pixels if needed */
@@ -424,12 +528,40 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
424 /* Add a partial step to rgbval, in this pixel isn't precisely 528 /* Add a partial step to rgbval, in this pixel isn't precisely
425 aligned with the new source pixel 529 aligned with the new source pixel
426 */ 530 */
427 val += MULQ(inc, ixe); 531#if defined(CPU_COLDFIRE)
532/* Coldfire EMAC math */
533 MAC(inc, ixe, 0);
534#elif defined(CPU_SH)
535/* SH-1 16x16->32 math */
536 val += mul_s16_s16(inc, ixe);
537#else
538/* generic C math */
539 val += inc * ixe;
540#endif
428 } 541 }
542#if defined(CPU_COLDFIRE)
543/* get final EMAC result out of ACC register */
544 MAC_OUT(val, 0);
545#endif
429 /* Now multiply the color increment to its proper value */ 546 /* Now multiply the color increment to its proper value */
430 inc = MULQ(inc, ctx->src->width - 1); 547#if defined(CPU_SH)
548/* SH-1 16x16->32 math */
549 inc = mul_s16_s16(inc, h_i_val);
550#else
551/* generic C math */
552 inc *= h_i_val;
553#endif
431 } else 554 } else
432 val += inc; 555 val += inc;
556#if !(defined(CPU_SH) || defined(TEST_SH_MATH))
557 /* round and scale values, and accumulate or store to output */
558 if (accum)
559 {
560 out_line[ox] += (val + (1 << 21)) >> 22;
561 } else {
562 out_line[ox] = (val + (1 << 21)) >> 22;
563 }
564#else
433 /* round and scale values, and accumulate or store to output */ 565 /* round and scale values, and accumulate or store to output */
434 if (accum) 566 if (accum)
435 { 567 {
@@ -438,7 +570,8 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
438 out_line[ox] = val; 570 out_line[ox] = val;
439 } 571 }
440#endif 572#endif
441 ixe += ctx->src->width - 1; 573#endif
574 ixe += h_i_val;
442 } 575 }
443 return true; 576 return true;
444} 577}
@@ -447,71 +580,66 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
447static inline bool scale_v_linear(struct rowset *rset, 580static inline bool scale_v_linear(struct rowset *rset,
448 struct scaler_context *ctx) 581 struct scaler_context *ctx)
449{ 582{
450 uint32_t mul, x, iy, iye; 583 uint32_t mul, iy, iye;
451 int32_t oy; 584 int32_t oy;
452 /* Set up scale and rounding factors, the divisor is bm->height - 1 */ 585#if defined(CPU_SH) || defined (TEST_SH_MATH)
453 ctx->divisor *= (ctx->bm->height - 1); 586 const uint32_t v_i_val = ctx->src->height - 1,
454 ctx->round = ctx->divisor >> 1; 587 v_o_val = ctx->bm->height - 1;
455 ctx->divisor = 1 + (-((ctx->divisor + 1) >> 1)) / ctx->divisor; 588#else
456 /* Set up our two temp buffers. The names are generic because they'll be 589 const uint32_t v_i_val = ctx->v_i_val,
457 swapped each time a new input row is read 590 v_o_val = ctx->v_o_val;
591#endif
592 /* Set up our buffers, to store the increment and current value for each
593 column, and one temp buffer used to read in new rows.
458 */ 594 */
459#ifdef HAVE_LCD_COLOR 595#ifdef HAVE_LCD_COLOR
460 uint32_t *rowinc = (uint32_t *)(ctx->buf), 596 uint32_t *rowinc = (uint32_t *)(ctx->buf),
461 *rowval = rowinc + 3 * ctx->bm->width, 597 *rowval = rowinc + 3 * ctx->bm->width,
462 *rowtmp = rowval + 3 * ctx->bm->width; 598 *rowtmp = rowval + 3 * ctx->bm->width,
463#else 599#else
464 uint32_t *rowinc = (uint32_t *)(ctx->buf), 600 uint32_t *rowinc = (uint32_t *)(ctx->buf),
465 *rowval = rowinc + ctx->bm->width, 601 *rowval = rowinc + ctx->bm->width,
466 *rowtmp = rowval + ctx->bm->width; 602 *rowtmp = rowval + ctx->bm->width,
467#endif 603#endif
604 *rowinc_px, *rowval_px, *rowtmp_px;
468 605
469 SDEBUGF("scale_v_linear\n"); 606 SDEBUGF("scale_v_linear\n");
470 mul = 0; 607 mul = 0;
471 iy = 0; 608 iy = 0;
472 iye = ctx->bm->height - 1; 609 iye = v_o_val;
473 /* get first scaled row in rowtmp */ 610 /* get first scaled row in rowtmp */
474 if(!ctx->h_scaler((void*)rowtmp, ctx, false)) 611 if(!ctx->h_scaler((void*)rowtmp, ctx, false))
475 return false; 612 return false;
476 for (oy = rset->rowstart; oy != rset->rowstop; oy += rset->rowstep) 613 for (oy = rset->rowstart; oy != rset->rowstop; oy += rset->rowstep)
477 { 614 {
478 if (iye >= (uint32_t)ctx->bm->height - 1) 615 if (iye >= v_o_val)
479 { 616 {
480 iye -= ctx->bm->height - 1; 617 iye -= v_o_val;
481 iy += 1; 618 iy += 1;
482#ifdef HAVE_LCD_COLOR 619 for(rowinc_px = rowinc, rowtmp_px = rowtmp, rowval_px = rowval;
483 for (x = 0; x < 3 * (uint32_t)ctx->bm->width; x++) 620 rowinc_px < rowval; rowinc_px++, rowtmp_px++, rowval_px++)
484#else
485 for (x = 0; x < (uint32_t)ctx->bm->width; x++)
486#endif
487 { 621 {
488 rowinc[x] = -rowtmp[x]; 622 *rowinc_px = -*rowtmp_px;
489 rowval[x] = rowtmp[x] * (ctx->bm->height - 1); 623 *rowval_px = *rowtmp_px * v_o_val;
490 } 624 }
491 if (iy < (uint32_t)ctx->src->height) 625 if (iy < (uint32_t)ctx->src->height)
492 { 626 {
493 if (!ctx->h_scaler((void*)rowtmp, ctx, false)) 627 if (!ctx->h_scaler((void*)rowtmp, ctx, false))
494 return false; 628 return false;
495#ifdef HAVE_LCD_COLOR 629 for(rowinc_px = rowinc, rowtmp_px = rowtmp, rowval_px = rowval;
496 for (x = 0; x < 3 * (uint32_t)ctx->bm->width; x++) 630 rowinc_px < rowval; rowinc_px++, rowtmp_px++, rowval_px++)
497#else
498 for (x = 0; x < (uint32_t)ctx->bm->width; x++)
499#endif
500 { 631 {
501 rowinc[x] += rowtmp[x]; 632 *rowinc_px += *rowtmp_px;
502 rowval[x] += rowinc[x] * iye; 633 *rowval_px += *rowinc_px * iye;
503 rowinc[x] *= ctx->src->height - 1; 634 *rowinc_px *= v_i_val;
504 } 635 }
505 } 636 }
506 } else 637 } else
507#ifdef HAVE_LCD_COLOR 638 for(rowinc_px = rowinc, rowval_px = rowval; rowinc_px < rowval;
508 for (x = 0; x < 3 * (uint32_t)ctx->bm->width; x++) 639 rowinc_px++, rowval_px++)
509#else 640 *rowval_px += *rowinc_px;
510 for (x = 0; x < (uint32_t)ctx->bm->width; x++)
511#endif
512 rowval[x] += rowinc[x];
513 ctx->output_row(oy, (void*)rowval, ctx); 641 ctx->output_row(oy, (void*)rowval, ctx);
514 iye += ctx->src->height - 1; 642 iye += v_i_val;
515 } 643 }
516 return true; 644 return true;
517} 645}
@@ -533,9 +661,9 @@ static void output_row_32_native_fromyuv(uint32_t row, void * row_in,
533 for (col = 0; col < ctx->bm->width; col++) { 661 for (col = 0; col < ctx->bm->width; col++) {
534 if (ctx->dither) 662 if (ctx->dither)
535 delta = DITHERXDY(col,dy); 663 delta = DITHERXDY(col,dy);
536 y = SC_MUL(qp->b + ctx->round, ctx->divisor); 664 y = SC_OUT(qp->b, ctx);
537 u = SC_MUL(qp->g + ctx->round, ctx->divisor); 665 u = SC_OUT(qp->g, ctx);
538 v = SC_MUL(qp->r + ctx->round, ctx->divisor); 666 v = SC_OUT(qp->r, ctx);
539 qp++; 667 qp++;
540 yuv_to_rgb(y, u, v, &r, &g, &b); 668 yuv_to_rgb(y, u, v, &r, &g, &b);
541 r = (31 * r + (r >> 3) + delta) >> 8; 669 r = (31 * r + (r >> 3) + delta) >> 8;
@@ -571,7 +699,7 @@ static void output_row_32_native(uint32_t row, void * row_in,
571 for (col = 0; col < ctx->bm->width; col++) { 699 for (col = 0; col < ctx->bm->width; col++) {
572 if (ctx->dither) 700 if (ctx->dither)
573 delta = DITHERXDY(col,dy); 701 delta = DITHERXDY(col,dy);
574 bright = SC_MUL((*qp++) + ctx->round,ctx->divisor); 702 bright = SC_OUT(*qp++, ctx);
575 bright = (3 * bright + (bright >> 6) + delta) >> 8; 703 bright = (3 * bright + (bright >> 6) + delta) >> 8;
576 data |= (~bright & 3) << shift; 704 data |= (~bright & 3) << shift;
577 shift -= 2; 705 shift -= 2;
@@ -594,7 +722,7 @@ static void output_row_32_native(uint32_t row, void * row_in,
594 for (col = 0; col < ctx->bm->width; col++) { 722 for (col = 0; col < ctx->bm->width; col++) {
595 if (ctx->dither) 723 if (ctx->dither)
596 delta = DITHERXDY(col,dy); 724 delta = DITHERXDY(col,dy);
597 bright = SC_MUL((*qp++) + ctx->round, ctx->divisor); 725 bright = SC_OUT(*qp++, ctx);
598 bright = (3 * bright + (bright >> 6) + delta) >> 8; 726 bright = (3 * bright + (bright >> 6) + delta) >> 8;
599 *dest++ |= (~bright & 3) << shift; 727 *dest++ |= (~bright & 3) << shift;
600 } 728 }
@@ -609,7 +737,7 @@ static void output_row_32_native(uint32_t row, void * row_in,
609 for (col = 0; col < ctx->bm->width; col++) { 737 for (col = 0; col < ctx->bm->width; col++) {
610 if (ctx->dither) 738 if (ctx->dither)
611 delta = DITHERXDY(col,dy); 739 delta = DITHERXDY(col,dy);
612 bright = SC_MUL((*qp++) + ctx->round, ctx->divisor); 740 bright = SC_OUT(*qp++, ctx);
613 bright = (3 * bright + (bright >> 6) + delta) >> 8; 741 bright = (3 * bright + (bright >> 6) + delta) >> 8;
614 *dest++ |= vi_pattern[bright] << shift; 742 *dest++ |= vi_pattern[bright] << shift;
615 } 743 }
@@ -625,9 +753,9 @@ static void output_row_32_native(uint32_t row, void * row_in,
625 if (ctx->dither) 753 if (ctx->dither)
626 delta = DITHERXDY(col,dy); 754 delta = DITHERXDY(col,dy);
627 q0 = *qp++; 755 q0 = *qp++;
628 r = SC_MUL(q0.r + ctx->round, ctx->divisor); 756 r = SC_OUT(q0.r, ctx);
629 g = SC_MUL(q0.g + ctx->round, ctx->divisor); 757 g = SC_OUT(q0.g, ctx);
630 b = SC_MUL(q0.b + ctx->round, ctx->divisor); 758 b = SC_OUT(q0.b, ctx);
631 r = (31 * r + (r >> 3) + delta) >> 8; 759 r = (31 * r + (r >> 3) + delta) >> 8;
632 g = (63 * g + (g >> 2) + delta) >> 8; 760 g = (63 * g + (g >> 2) + delta) >> 8;
633 b = (31 * b + (b >> 3) + delta) >> 8; 761 b = (31 * b + (b >> 3) + delta) >> 8;
@@ -664,13 +792,10 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src,
664 struct img_part* (*store_part)(void *args), 792 struct img_part* (*store_part)(void *args),
665 void *args) 793 void *args)
666{ 794{
667
668#ifdef HAVE_UPSCALER
669 const int sw = src->width; 795 const int sw = src->width;
670 const int sh = src->height; 796 const int sh = src->height;
671 const int dw = bm->width; 797 const int dw = bm->width;
672 const int dh = bm->height; 798 const int dh = bm->height;
673#endif
674 int ret; 799 int ret;
675#ifdef HAVE_LCD_COLOR 800#ifdef HAVE_LCD_COLOR
676 unsigned int needed = sizeof(struct uint32_rgb) * 3 * bm->width; 801 unsigned int needed = sizeof(struct uint32_rgb) * 3 * bm->width;
@@ -721,6 +846,9 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src,
721 ctx.bm = bm; 846 ctx.bm = bm;
722 ctx.src = src; 847 ctx.src = src;
723 ctx.dither = dither; 848 ctx.dither = dither;
849#if defined(CPU_SH) || defined (TEST_SH_MATH)
850 uint32_t div;
851#endif
724#if !defined(PLUGIN) 852#if !defined(PLUGIN)
725#if defined(HAVE_LCD_COLOR) && defined(HAVE_JPEG) 853#if defined(HAVE_LCD_COLOR) && defined(HAVE_JPEG)
726 ctx.output_row = format_index ? output_row_32_native_fromyuv 854 ctx.output_row = format_index ? output_row_32_native_fromyuv
@@ -740,23 +868,56 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src,
740 { 868 {
741#endif 869#endif
742 ctx.h_scaler = scale_h_area; 870 ctx.h_scaler = scale_h_area;
743 scale_h_area_setup(&ctx); 871#if defined(CPU_SH) || defined (TEST_SH_MATH)
872 div = sw;
873#else
874 uint32_t h_div = (1U << 24) / sw;
875 ctx.h_i_val = sw * h_div;
876 ctx.h_o_val = dw * h_div;
877#endif
744#ifdef HAVE_UPSCALER 878#ifdef HAVE_UPSCALER
745 } else { 879 } else {
746 ctx.h_scaler = scale_h_linear; 880 ctx.h_scaler = scale_h_linear;
747 scale_h_linear_setup(&ctx); 881#if defined(CPU_SH) || defined (TEST_SH_MATH)
882 div = dw - 1;
883#else
884 uint32_t h_div = (1U << 24) / (dw - 1);
885 ctx.h_i_val = (sw - 1) * h_div;
886 ctx.h_o_val = (dw - 1) * h_div;
887#endif
748 } 888 }
749#endif 889#endif
750 SC_MUL_INIT; 890#ifdef CPU_COLDFIRE
891 coldfire_set_macsr(EMAC_UNSIGNED);
892#endif
751#ifdef HAVE_UPSCALER 893#ifdef HAVE_UPSCALER
752 if (sh > dh) 894 if (sh > dh)
753#endif 895#endif
896 {
897#if defined(CPU_SH) || defined (TEST_SH_MATH)
898 div *= sh;
899 ctx.recip = ((uint32_t)(-div)) / div + 1;
900#else
901 uint32_t v_div = (1U << 22) / sh;
902 ctx.v_i_val = sh * v_div;
903 ctx.v_o_val = dh * v_div;
904#endif
754 ret = scale_v_area(rset, &ctx); 905 ret = scale_v_area(rset, &ctx);
906 }
755#ifdef HAVE_UPSCALER 907#ifdef HAVE_UPSCALER
756 else 908 else
909 {
910#if defined(CPU_SH) || defined (TEST_SH_MATH)
911 div *= dh - 1;
912 ctx.recip = ((uint32_t)(-div)) / div + 1;
913#else
914 uint32_t v_div = (1U << 22) / dh;
915 ctx.v_i_val = (sh - 1) * v_div;
916 ctx.v_o_val = (dh - 1) * v_div;
917#endif
757 ret = scale_v_linear(rset, &ctx); 918 ret = scale_v_linear(rset, &ctx);
919 }
758#endif 920#endif
759 SC_MUL_END;
760#ifdef HAVE_ADJUSTABLE_CPU_FREQ 921#ifdef HAVE_ADJUSTABLE_CPU_FREQ
761 cpu_boost(false); 922 cpu_boost(false);
762#endif 923#endif
diff --git a/apps/recorder/resize.h b/apps/recorder/resize.h
index 2964fcd2a9..ef32066a0d 100644
--- a/apps/recorder/resize.h
+++ b/apps/recorder/resize.h
@@ -43,67 +43,61 @@
43#define MAX_SC_STACK_ALLOC 0 43#define MAX_SC_STACK_ALLOC 0
44#define HAVE_UPSCALER 1 44#define HAVE_UPSCALER 1
45 45
46#if defined(CPU_COLDFIRE) 46#if defined(CPU_SH)
47#define SC_MUL_INIT \ 47/* perform 32x32->40 unsigned multiply, round off and return top 8 bits */
48 unsigned long macsr_st = coldfire_get_macsr(); \ 48static inline uint32_t sc_mul_u32_rnd(uint32_t m, uint32_t n)
49 coldfire_set_macsr(EMAC_UNSIGNED);
50#define SC_MUL_END coldfire_set_macsr(macsr_st);
51#define SC_MUL(x, y) \
52({ \
53 unsigned long t; \
54 asm ("mac.l %[a], %[b], %%acc0\n\t" \
55 "move.l %%accext01, %[t]\n\t" \
56 "move.l #0, %%acc0\n\t" \
57 : [t] "=r" (t) : [a] "r" (x), [b] "r" (y)); \
58 t; \
59})
60#elif (CONFIG_CPU == SH7034)
61/* multiply two unsigned 32 bit values and return the top 32 bit
62 * of the 64 bit result */
63static inline unsigned sc_mul32(unsigned a, unsigned b)
64{ 49{
65 unsigned r, t1, t2, t3; 50 unsigned r, t1, t2, t3;
66 51 unsigned h = 1 << 15;
52 /* notation:
53 m = ab, n = cd
54 final result is (((a *c) << 32) + ((b * c + a * d) << 16) + b * d +
55 (1 << 31)) >> 32
56 */
67 asm ( 57 asm (
68 "swap.w %[a], %[t1] \n" /* t1 = ba */ 58 "swap.w %[m], %[t1]\n\t" /* t1 = ba */
69 "mulu %[t1], %[b] \n" /* a * d */ 59 "mulu %[m], %[n]\n\t" /* b * d */
70 "swap.w %[b], %[t3] \n" /* t3 = dc */ 60 "swap.w %[n], %[t3]\n\t" /* t3 = dc */
71 "sts macl, %[t2] \n" /* t2 = a * d */ 61 "sts macl, %[r]\n\t" /* r = b * d */
72 "mulu %[t1], %[t3] \n" /* a * c */ 62 "mulu %[m], %[t3]\n\t" /* b * c */
73 "sts macl, %[r] \n" /* hi = a * c */ 63 "shlr16 %[r]\n\t"
74 "mulu %[a], %[t3] \n" /* b * c */ 64 "sts macl, %[t2]\n\t" /* t2 = b * c */
75 "clrt \n" 65 "mulu %[t1], %[t3]\n\t" /* a * c */
76 "sts macl, %[t3] \n" /* t3 = b * c */ 66 "add %[t2], %[r]\n\t"
77 "addc %[t2], %[t3] \n" /* t3 += t2, carry -> t2 */ 67 "sts macl, %[t3]\n\t" /* t3 = a * c */
78 "movt %[t2] \n" 68 "mulu %[t1], %[n]\n\t" /* a * d */
79 "mulu %[a], %[b] \n" /* b * d */ 69 "shll16 %[t3]\n\t"
80 "mov %[t3], %[t1] \n" /* t1t3 = t2t3 << 16 */ 70 "sts macl, %[t2]\n\t" /* t2 = a * d */
81 "xtrct %[t2], %[t1] \n" 71 "add %[t2], %[r]\n\t"
82 "shll16 %[t3] \n" 72 "add %[t3], %[r]\n\t" /* r = ((b * d) >> 16) + (b * c + a * d) +
83 "sts macl, %[t2] \n" /* lo = b * d */ 73 ((a * c) << 16) */
84 "clrt \n" /* hi.lo += t1t3 */ 74 "add %[h], %[r]\n\t" /* round result */
85 "addc %[t3], %[t2] \n" 75 "shlr16 %[r]\n\t" /* truncate result */
86 "addc %[t1], %[r] \n"
87 : /* outputs */ 76 : /* outputs */
88 [r] "=&r"(r), 77 [r] "=&r"(r),
89 [t1]"=&r"(t1), 78 [t1]"=&r"(t1),
90 [t2]"=&r"(t2), 79 [t2]"=&r"(t2),
91 [t3]"=&r"(t3) 80 [t3]"=&r"(t3)
92 : /* inputs */ 81 : /* inputs */
93 [a] "r" (a), 82 [h] "r" (h),
94 [b] "r" (b) 83 [m] "r" (m),
84 [n] "r" (n)
95 ); 85 );
96 return r; 86 return r;
97} 87}
98#define SC_MUL(x, y) sc_mul32(x, y) 88#elif defined(TEST_SH_MATH)
99#define SC_MUL_INIT 89static inline uint32_t sc_mul_u32_rnd(uint32_t op1, uint32_t op2)
100#define SC_MUL_END 90{
91 uint64_t tmp = (uint64_t)op1 * op2;
92 tmp += 1LU << 31;
93 tmp >>= 32;
94 return tmp;
95}
96#else
97#define SC_OUT(n, c) (((n) + (1 << 23)) >> 24)
101#endif 98#endif
102 99#ifndef SC_OUT
103#ifndef SC_MUL 100#define SC_OUT(n, c) (sc_mul_u32_rnd(n, (c)->recip))
104#define SC_MUL(x, y) ((x) * (uint64_t)(y) >> 32)
105#define SC_MUL_INIT
106#define SC_MUL_END
107#endif 101#endif
108 102
109struct img_part { 103struct img_part {
@@ -130,8 +124,14 @@ struct uint32_rgb {
130 horizontal scaler, and row output 124 horizontal scaler, and row output
131*/ 125*/
132struct scaler_context { 126struct scaler_context {
133 uint32_t divisor; 127#if defined(CPU_SH) || defined(TEST_SH_MATH)
134 uint32_t round; 128 uint32_t recip;
129#else
130 uint32_t h_i_val;
131 uint32_t h_o_val;
132 uint32_t v_i_val;
133 uint32_t v_o_val;
134#endif
135 struct bitmap *bm; 135 struct bitmap *bm;
136 struct dim *src; 136 struct dim *src;
137 unsigned char *buf; 137 unsigned char *buf;