diff options
author | Andrew Mahone <andrew.mahone@gmail.com> | 2009-05-26 20:00:47 +0000 |
---|---|---|
committer | Andrew Mahone <andrew.mahone@gmail.com> | 2009-05-26 20:00:47 +0000 |
commit | 92785b8f2f20b0fc16de7e771e5eb55fd8497ff8 (patch) | |
tree | 3af2399c1e1be8e56cb1b5e6787efd738dad6d52 /apps | |
parent | c4ed88f59302882749023268ac456c415a4b1243 (diff) | |
download | rockbox-92785b8f2f20b0fc16de7e771e5eb55fd8497ff8.tar.gz rockbox-92785b8f2f20b0fc16de7e771e5eb55fd8497ff8.zip |
Use pre-multiplication in scaler to save one multiply per color component on ARM and Coldfire, at the cost of an extra add/shift in the horizontal scaler to reduce values to a workable range. SH-1 retains the same basic math, as
the use of 16x16->32 hardware multiplication in the earlier scaler stages saves more than removing the 32x32->40 multiply to descale output.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21091 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/plugins/bench_scaler.c | 4 | ||||
-rw-r--r-- | apps/plugins/lib/grey_draw.c | 2 | ||||
-rw-r--r-- | apps/plugins/pictureflow/pictureflow.c | 49 | ||||
-rw-r--r-- | apps/recorder/resize.c | 409 | ||||
-rw-r--r-- | apps/recorder/resize.h | 100 |
5 files changed, 354 insertions, 210 deletions
diff --git a/apps/plugins/bench_scaler.c b/apps/plugins/bench_scaler.c index c24807dad6..246271dcc8 100644 --- a/apps/plugins/bench_scaler.c +++ b/apps/plugins/bench_scaler.c | |||
@@ -49,8 +49,8 @@ static void output_row_null(uint32_t row, void * row_in, | |||
49 | #else | 49 | #else |
50 | uint32_t *lim = in + ctx->bm->width; | 50 | uint32_t *lim = in + ctx->bm->width; |
51 | #endif | 51 | #endif |
52 | for (; in < lim; in++) | 52 | while (in < lim) |
53 | output = SC_MUL(*in + ctx->round, ctx->divisor); | 53 | output = SC_OUT(*in++, ctx); |
54 | return; | 54 | return; |
55 | } | 55 | } |
56 | 56 | ||
diff --git a/apps/plugins/lib/grey_draw.c b/apps/plugins/lib/grey_draw.c index 6315ad9b1a..c1e6376cfe 100644 --- a/apps/plugins/lib/grey_draw.c +++ b/apps/plugins/lib/grey_draw.c | |||
@@ -733,7 +733,7 @@ static void output_row_grey_32(uint32_t row, void * row_in, | |||
733 | uint32_t *qp = (uint32_t*)row_in; | 733 | uint32_t *qp = (uint32_t*)row_in; |
734 | uint8_t *dest = (uint8_t*)ctx->bm->data + ctx->bm->width * row; | 734 | uint8_t *dest = (uint8_t*)ctx->bm->data + ctx->bm->width * row; |
735 | for (col = 0; col < ctx->bm->width; col++) | 735 | for (col = 0; col < ctx->bm->width; col++) |
736 | *dest++ = SC_MUL((*qp++) + ctx->round,ctx->divisor); | 736 | *dest++ = SC_OUT(*qp++, ctx); |
737 | } | 737 | } |
738 | 738 | ||
739 | static unsigned int get_size_grey(struct bitmap *bm) | 739 | static unsigned int get_size_grey(struct bitmap *bm) |
diff --git a/apps/plugins/pictureflow/pictureflow.c b/apps/plugins/pictureflow/pictureflow.c index a1ad3d2776..bbe2541681 100644 --- a/apps/plugins/pictureflow/pictureflow.c +++ b/apps/plugins/pictureflow/pictureflow.c | |||
@@ -592,25 +592,12 @@ static inline PFreal fcos(int iangle) | |||
592 | return fsin(iangle + (IANGLE_MAX >> 2)); | 592 | return fsin(iangle + (IANGLE_MAX >> 2)); |
593 | } | 593 | } |
594 | 594 | ||
595 | static inline uint32_t div255(uint32_t val) | 595 | static inline unsigned scale_val(unsigned val, unsigned bits) |
596 | { | 596 | { |
597 | return ((((val >> 8) + val) >> 8) + val) >> 8; | 597 | val = val * ((1 << bits) - 1); |
598 | return ((val >> 8) + val + 128) >> 8; | ||
598 | } | 599 | } |
599 | 600 | ||
600 | #define SCALE_VAL(val,out) div255((val) * (out) + 127) | ||
601 | #define SCALE_VAL32(val, out) \ | ||
602 | ({ \ | ||
603 | uint32_t val__ = (val) * (out); \ | ||
604 | val__ = ((((val__ >> 8) + val__) >> 8) + val__ + 128) >> 8; \ | ||
605 | val__; \ | ||
606 | }) | ||
607 | #define SCALE_VAL8(val, out) \ | ||
608 | ({ \ | ||
609 | unsigned val__ = (val) * (out); \ | ||
610 | val__ = ((val__ >> 8) + val__ + 128) >> 8; \ | ||
611 | val__; \ | ||
612 | }) | ||
613 | |||
614 | static void output_row_8_transposed(uint32_t row, void * row_in, | 601 | static void output_row_8_transposed(uint32_t row, void * row_in, |
615 | struct scaler_context *ctx) | 602 | struct scaler_context *ctx) |
616 | { | 603 | { |
@@ -625,9 +612,9 @@ static void output_row_8_transposed(uint32_t row, void * row_in, | |||
625 | unsigned r, g, b; | 612 | unsigned r, g, b; |
626 | for (; dest < end; dest += ctx->bm->height) | 613 | for (; dest < end; dest += ctx->bm->height) |
627 | { | 614 | { |
628 | r = SCALE_VAL8(qp->red, 31); | 615 | r = scale_val(qp->red, 5); |
629 | g = SCALE_VAL8(qp->green, 63); | 616 | g = scale_val(qp->green, 6); |
630 | b = SCALE_VAL8((qp++)->blue, 31); | 617 | b = scale_val((qp++)->blue, 5); |
631 | *dest = LCD_RGBPACK_LCD(r,g,b); | 618 | *dest = LCD_RGBPACK_LCD(r,g,b); |
632 | } | 619 | } |
633 | #endif | 620 | #endif |
@@ -641,19 +628,15 @@ static void output_row_32_transposed(uint32_t row, void * row_in, | |||
641 | #ifdef USEGSLIB | 628 | #ifdef USEGSLIB |
642 | uint32_t *qp = (uint32_t*)row_in; | 629 | uint32_t *qp = (uint32_t*)row_in; |
643 | for (; dest < end; dest += ctx->bm->height) | 630 | for (; dest < end; dest += ctx->bm->height) |
644 | *dest = SC_MUL((*qp++) + ctx->round, ctx->divisor); | 631 | *dest = SC_OUT(*qp++, ctx); |
645 | #else | 632 | #else |
646 | struct uint32_rgb *qp = (struct uint32_rgb*)row_in; | 633 | struct uint32_rgb *qp = (struct uint32_rgb*)row_in; |
647 | uint32_t rb_mul = SCALE_VAL32(ctx->divisor, 31), | ||
648 | rb_rnd = SCALE_VAL32(ctx->round, 31), | ||
649 | g_mul = SCALE_VAL32(ctx->divisor, 63), | ||
650 | g_rnd = SCALE_VAL32(ctx->round, 63); | ||
651 | int r, g, b; | 634 | int r, g, b; |
652 | for (; dest < end; dest += ctx->bm->height) | 635 | for (; dest < end; dest += ctx->bm->height) |
653 | { | 636 | { |
654 | r = SC_MUL(qp->r + rb_rnd, rb_mul); | 637 | r = scale_val(SC_OUT(qp->r, ctx), 5); |
655 | g = SC_MUL(qp->g + g_rnd, g_mul); | 638 | g = scale_val(SC_OUT(qp->g, ctx), 6); |
656 | b = SC_MUL(qp->b + rb_rnd, rb_mul); | 639 | b = scale_val(SC_OUT(qp->b, ctx), 5); |
657 | qp++; | 640 | qp++; |
658 | *dest = LCD_RGBPACK_LCD(r,g,b); | 641 | *dest = LCD_RGBPACK_LCD(r,g,b); |
659 | } | 642 | } |
@@ -670,14 +653,14 @@ static void output_row_32_transposed_fromyuv(uint32_t row, void * row_in, | |||
670 | for (; dest < end; dest += ctx->bm->height) | 653 | for (; dest < end; dest += ctx->bm->height) |
671 | { | 654 | { |
672 | unsigned r, g, b, y, u, v; | 655 | unsigned r, g, b, y, u, v; |
673 | y = SC_MUL(qp->b + ctx->round, ctx->divisor); | 656 | y = SC_OUT(qp->b, ctx); |
674 | u = SC_MUL(qp->g + ctx->round, ctx->divisor); | 657 | u = SC_OUT(qp->g, ctx); |
675 | v = SC_MUL(qp->r + ctx->round, ctx->divisor); | 658 | v = SC_OUT(qp->r, ctx); |
676 | qp++; | 659 | qp++; |
677 | yuv_to_rgb(y, u, v, &r, &g, &b); | 660 | yuv_to_rgb(y, u, v, &r, &g, &b); |
678 | r = (31 * r + (r >> 3) + 127) >> 8; | 661 | r = scale_val(r, 5); |
679 | g = (63 * g + (g >> 2) + 127) >> 8; | 662 | g = scale_val(g, 6); |
680 | b = (31 * b + (b >> 3) + 127) >> 8; | 663 | b = scale_val(b, 5); |
681 | *dest = LCD_RGBPACK_LCD(r, g, b); | 664 | *dest = LCD_RGBPACK_LCD(r, g, b); |
682 | } | 665 | } |
683 | } | 666 | } |
diff --git a/apps/recorder/resize.c b/apps/recorder/resize.c index 1e9210e819..3a0ad8d75b 100644 --- a/apps/recorder/resize.c +++ b/apps/recorder/resize.c | |||
@@ -131,20 +131,45 @@ int recalc_dimension(struct dim *dst, struct dim *src) | |||
131 | return false; \ | 131 | return false; \ |
132 | } | 132 | } |
133 | 133 | ||
134 | /* Set up rounding and scale factors for horizontal area scaler */ | 134 | #if defined(CPU_COLDFIRE) |
135 | static inline void scale_h_area_setup(struct scaler_context *ctx) | 135 | #define MAC(op1, op2, num) \ |
136 | asm volatile( \ | ||
137 | "mac.l %0, %1, %%acc" #num \ | ||
138 | : \ | ||
139 | : "%d" (op1), "d" (op2)\ | ||
140 | ) | ||
141 | #define MAC_OUT(dest, num) \ | ||
142 | asm volatile( \ | ||
143 | "movclr.l %%acc" #num ", %0" \ | ||
144 | : "=d" (dest) \ | ||
145 | ) | ||
146 | #elif defined(CPU_SH) | ||
147 | /* calculate the 32-bit product of unsigned 16-bit op1 and op2 */ | ||
148 | static inline int32_t mul_s16_s16(int16_t op1, int16_t op2) | ||
136 | { | 149 | { |
137 | /* sum is output value * src->width */ | 150 | return (int32_t)(op1 * op2); |
138 | SDEBUGF("scale_h_area_setup\n"); | ||
139 | ctx->divisor = ctx->src->width; | ||
140 | } | 151 | } |
141 | 152 | ||
153 | /* calculate the 32-bit product of signed 16-bit op1 and op2 */ | ||
154 | static inline uint32_t mul_u16_u16(uint16_t op1, uint16_t op2) | ||
155 | { | ||
156 | return (uint32_t)(op1 * op2); | ||
157 | } | ||
158 | #endif | ||
159 | |||
142 | /* horizontal area average scaler */ | 160 | /* horizontal area average scaler */ |
143 | static bool scale_h_area(void *out_line_ptr, | 161 | static bool scale_h_area(void *out_line_ptr, |
144 | struct scaler_context *ctx, bool accum) | 162 | struct scaler_context *ctx, bool accum) |
145 | { | 163 | { |
146 | SDEBUGF("scale_h_area\n"); | 164 | SDEBUGF("scale_h_area\n"); |
147 | unsigned int ix, ox, oxe, mul; | 165 | unsigned int ix, ox, oxe, mul; |
166 | #if defined(CPU_SH) || defined (TEST_SH_MATH) | ||
167 | const uint32_t h_i_val = ctx->src->width, | ||
168 | h_o_val = ctx->bm->width; | ||
169 | #else | ||
170 | const uint32_t h_i_val = ctx->h_i_val, | ||
171 | h_o_val = ctx->h_o_val; | ||
172 | #endif | ||
148 | #ifdef HAVE_LCD_COLOR | 173 | #ifdef HAVE_LCD_COLOR |
149 | struct uint32_rgb rgbvalacc = { 0, 0, 0 }, | 174 | struct uint32_rgb rgbvalacc = { 0, 0, 0 }, |
150 | rgbvaltmp = { 0, 0, 0 }, | 175 | rgbvaltmp = { 0, 0, 0 }, |
@@ -161,31 +186,57 @@ static bool scale_h_area(void *out_line_ptr, | |||
161 | yield(); | 186 | yield(); |
162 | for (ix = 0; ix < (unsigned int)ctx->src->width; ix++) | 187 | for (ix = 0; ix < (unsigned int)ctx->src->width; ix++) |
163 | { | 188 | { |
164 | oxe += ctx->bm->width; | 189 | oxe += h_o_val; |
165 | /* end of current area has been reached */ | 190 | /* end of current area has been reached */ |
166 | /* fill buffer if needed */ | 191 | /* fill buffer if needed */ |
167 | FILL_BUF(part,ctx->store_part,ctx->args); | 192 | FILL_BUF(part,ctx->store_part,ctx->args); |
168 | #ifdef HAVE_LCD_COLOR | 193 | #ifdef HAVE_LCD_COLOR |
169 | if (oxe >= (unsigned int)ctx->src->width) | 194 | if (oxe >= h_i_val) |
170 | { | 195 | { |
171 | /* "reset" error, which now represents partial coverage of next | 196 | /* "reset" error, which now represents partial coverage of next |
172 | pixel by the next area | 197 | pixel by the next area |
173 | */ | 198 | */ |
174 | oxe -= ctx->src->width; | 199 | oxe -= h_i_val; |
175 | 200 | ||
201 | #if defined(CPU_COLDFIRE) | ||
202 | /* Coldfire EMAC math */ | ||
176 | /* add saved partial pixel from start of area */ | 203 | /* add saved partial pixel from start of area */ |
177 | rgbvalacc.r = rgbvalacc.r * ctx->bm->width + rgbvaltmp.r * mul; | 204 | MAC(rgbvalacc.r, h_o_val, 0); |
178 | rgbvalacc.g = rgbvalacc.g * ctx->bm->width + rgbvaltmp.g * mul; | 205 | MAC(rgbvalacc.g, h_o_val, 1); |
179 | rgbvalacc.b = rgbvalacc.b * ctx->bm->width + rgbvaltmp.b * mul; | 206 | MAC(rgbvalacc.b, h_o_val, 2); |
207 | MAC(rgbvaltmp.r, mul, 0); | ||
208 | MAC(rgbvaltmp.g, mul, 1); | ||
209 | MAC(rgbvaltmp.b, mul, 2); | ||
210 | /* get new pixel , then add its partial coverage to this area */ | ||
211 | mul = h_o_val - oxe; | ||
212 | rgbvaltmp.r = part->buf->red; | ||
213 | rgbvaltmp.g = part->buf->green; | ||
214 | rgbvaltmp.b = part->buf->blue; | ||
215 | MAC(rgbvaltmp.r, mul, 0); | ||
216 | MAC(rgbvaltmp.g, mul, 1); | ||
217 | MAC(rgbvaltmp.b, mul, 2); | ||
218 | MAC_OUT(rgbvalacc.r, 0); | ||
219 | MAC_OUT(rgbvalacc.g, 1); | ||
220 | MAC_OUT(rgbvalacc.b, 2); | ||
221 | #else | ||
222 | /* generic C math */ | ||
223 | /* add saved partial pixel from start of area */ | ||
224 | rgbvalacc.r = rgbvalacc.r * h_o_val + rgbvaltmp.r * mul; | ||
225 | rgbvalacc.g = rgbvalacc.g * h_o_val + rgbvaltmp.g * mul; | ||
226 | rgbvalacc.b = rgbvalacc.b * h_o_val + rgbvaltmp.b * mul; | ||
180 | 227 | ||
181 | /* get new pixel , then add its partial coverage to this area */ | 228 | /* get new pixel , then add its partial coverage to this area */ |
182 | rgbvaltmp.r = part->buf->red; | 229 | rgbvaltmp.r = part->buf->red; |
183 | rgbvaltmp.g = part->buf->green; | 230 | rgbvaltmp.g = part->buf->green; |
184 | rgbvaltmp.b = part->buf->blue; | 231 | rgbvaltmp.b = part->buf->blue; |
185 | mul = ctx->bm->width - oxe; | 232 | mul = h_o_val - oxe; |
186 | rgbvalacc.r += rgbvaltmp.r * mul; | 233 | rgbvalacc.r += rgbvaltmp.r * mul; |
187 | rgbvalacc.g += rgbvaltmp.g * mul; | 234 | rgbvalacc.g += rgbvaltmp.g * mul; |
188 | rgbvalacc.b += rgbvaltmp.b * mul; | 235 | rgbvalacc.b += rgbvaltmp.b * mul; |
236 | #endif /* CPU */ | ||
237 | rgbvalacc.r = (rgbvalacc.r + (1 << 21)) >> 22; | ||
238 | rgbvalacc.g = (rgbvalacc.g + (1 << 21)) >> 22; | ||
239 | rgbvalacc.b = (rgbvalacc.b + (1 << 21)) >> 22; | ||
189 | /* store or accumulate to output row */ | 240 | /* store or accumulate to output row */ |
190 | if (accum) | 241 | if (accum) |
191 | { | 242 | { |
@@ -200,7 +251,7 @@ static bool scale_h_area(void *out_line_ptr, | |||
200 | rgbvalacc.r = 0; | 251 | rgbvalacc.r = 0; |
201 | rgbvalacc.g = 0; | 252 | rgbvalacc.g = 0; |
202 | rgbvalacc.b = 0; | 253 | rgbvalacc.b = 0; |
203 | mul = ctx->bm->width - mul; | 254 | mul = oxe; |
204 | ox += 1; | 255 | ox += 1; |
205 | /* inside an area */ | 256 | /* inside an area */ |
206 | } else { | 257 | } else { |
@@ -210,21 +261,45 @@ static bool scale_h_area(void *out_line_ptr, | |||
210 | rgbvalacc.b += part->buf->blue; | 261 | rgbvalacc.b += part->buf->blue; |
211 | } | 262 | } |
212 | #else | 263 | #else |
213 | if (oxe >= (unsigned int)ctx->src->width) | 264 | if (oxe >= h_i_val) |
214 | { | 265 | { |
215 | /* "reset" error, which now represents partial coverage of next | 266 | /* "reset" error, which now represents partial coverage of next |
216 | pixel by the next area | 267 | pixel by the next area |
217 | */ | 268 | */ |
218 | oxe -= ctx->src->width; | 269 | oxe -= h_i_val; |
270 | #if defined(CPU_COLDFIRE) | ||
271 | /* Coldfire EMAC math */ | ||
272 | /* add saved partial pixel from start of area */ | ||
273 | MAC(acc, h_o_val, 0); | ||
274 | MAC(tmp, mul, 0); | ||
275 | /* get new pixel , then add its partial coverage to this area */ | ||
276 | tmp = *(part->buf); | ||
277 | mul = h_o_val - oxe; | ||
278 | MAC(tmp, mul, 0); | ||
279 | MAC_OUT(acc, 0); | ||
280 | #elif defined(CPU_SH) | ||
281 | /* SH-1 16x16->32 math */ | ||
282 | /* add saved partial pixel from start of area */ | ||
283 | acc = mul_u16_u16(acc, h_o_val) + mul_u16_u16(tmp, mul); | ||
219 | 284 | ||
285 | /* get new pixel , then add its partial coverage to this area */ | ||
286 | tmp = *(part->buf); | ||
287 | mul = h_o_val - oxe; | ||
288 | acc += mul_u16_u16(tmp, mul); | ||
289 | #else | ||
290 | /* generic C math */ | ||
220 | /* add saved partial pixel from start of area */ | 291 | /* add saved partial pixel from start of area */ |
221 | acc = MULUQ(acc, ctx->bm->width) + MULUQ(tmp, mul); | 292 | acc = (acc * h_o_val) + (tmp * mul); |
222 | 293 | ||
223 | /* get new pixel , then add its partial coverage to this area */ | 294 | /* get new pixel , then add its partial coverage to this area */ |
224 | tmp = *(part->buf); | 295 | tmp = *(part->buf); |
225 | mul = ctx->bm->width - oxe; | 296 | mul = h_o_val - oxe; |
226 | acc += MULUQ(tmp, mul); | 297 | acc += tmp * mul; |
298 | #endif /* CPU */ | ||
299 | #if !(defined(CPU_SH) || defined(TEST_SH_MATH)) | ||
227 | /* round, divide, and either store or accumulate to output row */ | 300 | /* round, divide, and either store or accumulate to output row */ |
301 | acc = (acc + (1 << 21)) >> 22; | ||
302 | #endif | ||
228 | if (accum) | 303 | if (accum) |
229 | { | 304 | { |
230 | acc += out_line[ox]; | 305 | acc += out_line[ox]; |
@@ -232,7 +307,7 @@ static bool scale_h_area(void *out_line_ptr, | |||
232 | out_line[ox] = acc; | 307 | out_line[ox] = acc; |
233 | /* reset accumulator */ | 308 | /* reset accumulator */ |
234 | acc = 0; | 309 | acc = 0; |
235 | mul = ctx->bm->width - mul; | 310 | mul = oxe; |
236 | ox += 1; | 311 | ox += 1; |
237 | /* inside an area */ | 312 | /* inside an area */ |
238 | } else { | 313 | } else { |
@@ -249,56 +324,56 @@ static bool scale_h_area(void *out_line_ptr, | |||
249 | /* vertical area average scaler */ | 324 | /* vertical area average scaler */ |
250 | static inline bool scale_v_area(struct rowset *rset, struct scaler_context *ctx) | 325 | static inline bool scale_v_area(struct rowset *rset, struct scaler_context *ctx) |
251 | { | 326 | { |
252 | uint32_t mul, x, oy, iy, oye; | 327 | uint32_t mul, oy, iy, oye; |
328 | #if defined(CPU_SH) || defined (TEST_SH_MATH) | ||
329 | const uint32_t v_i_val = ctx->src->height, | ||
330 | v_o_val = ctx->bm->height; | ||
331 | #else | ||
332 | const uint32_t v_i_val = ctx->v_i_val, | ||
333 | v_o_val = ctx->v_o_val; | ||
334 | #endif | ||
253 | 335 | ||
254 | /* Set up rounding and scale factors */ | 336 | /* Set up rounding and scale factors */ |
255 | ctx->divisor *= ctx->src->height; | ||
256 | ctx->round = ctx->divisor >> 1; | ||
257 | ctx->divisor = 1 + (-((ctx->divisor + 1) >> 1)) / ctx->divisor; | ||
258 | mul = 0; | 337 | mul = 0; |
259 | oy = rset->rowstart; | 338 | oy = rset->rowstart; |
260 | oye = 0; | 339 | oye = 0; |
261 | #ifdef HAVE_LCD_COLOR | 340 | #ifdef HAVE_LCD_COLOR |
262 | uint32_t *rowacc = (uint32_t *) ctx->buf, | 341 | uint32_t *rowacc = (uint32_t *) ctx->buf, |
263 | *rowtmp = rowacc + 3 * ctx->bm->width; | 342 | *rowtmp = rowacc + 3 * ctx->bm->width, |
343 | *rowacc_px, *rowtmp_px; | ||
264 | memset((void *)ctx->buf, 0, ctx->bm->width * 2 * sizeof(struct uint32_rgb)); | 344 | memset((void *)ctx->buf, 0, ctx->bm->width * 2 * sizeof(struct uint32_rgb)); |
265 | #else | 345 | #else |
266 | uint32_t *rowacc = (uint32_t *) ctx->buf, | 346 | uint32_t *rowacc = (uint32_t *) ctx->buf, |
267 | *rowtmp = rowacc + ctx->bm->width; | 347 | *rowtmp = rowacc + ctx->bm->width, |
348 | *rowacc_px, *rowtmp_px; | ||
268 | memset((void *)ctx->buf, 0, ctx->bm->width * 2 * sizeof(uint32_t)); | 349 | memset((void *)ctx->buf, 0, ctx->bm->width * 2 * sizeof(uint32_t)); |
269 | #endif | 350 | #endif |
270 | SDEBUGF("scale_v_area\n"); | 351 | SDEBUGF("scale_v_area\n"); |
271 | /* zero the accumulator and temp rows */ | 352 | /* zero the accumulator and temp rows */ |
272 | for (iy = 0; iy < (unsigned int)ctx->src->height; iy++) | 353 | for (iy = 0; iy < (unsigned int)ctx->src->height; iy++) |
273 | { | 354 | { |
274 | oye += ctx->bm->height; | 355 | oye += v_o_val; |
275 | /* end of current area has been reached */ | 356 | /* end of current area has been reached */ |
276 | if (oye >= (unsigned int)ctx->src->height) | 357 | if (oye >= v_i_val) |
277 | { | 358 | { |
278 | /* "reset" error, which now represents partial coverage of the next | 359 | /* "reset" error, which now represents partial coverage of the next |
279 | row by the next area | 360 | row by the next area |
280 | */ | 361 | */ |
281 | oye -= ctx->src->height; | 362 | oye -= v_i_val; |
282 | /* add stored partial row to accumulator */ | 363 | /* add stored partial row to accumulator */ |
283 | #ifdef HAVE_LCD_COLOR | 364 | for(rowacc_px = rowacc, rowtmp_px = rowtmp; rowacc_px != rowtmp; |
284 | for (x = 0; x < 3 * (unsigned int)ctx->bm->width; x++) | 365 | rowacc_px++, rowtmp_px++) |
285 | #else | 366 | *rowacc_px = *rowacc_px * v_o_val + *rowtmp_px * mul; |
286 | for (x = 0; x < (unsigned int)ctx->bm->width; x++) | ||
287 | #endif | ||
288 | rowacc[x] = rowacc[x] * ctx->bm->height + mul * rowtmp[x]; | ||
289 | /* store new scaled row in temp row */ | 367 | /* store new scaled row in temp row */ |
290 | if(!ctx->h_scaler(rowtmp, ctx, false)) | 368 | if(!ctx->h_scaler(rowtmp, ctx, false)) |
291 | return false; | 369 | return false; |
292 | /* add partial coverage by new row to this area, then round and | 370 | /* add partial coverage by new row to this area, then round and |
293 | scale to final value | 371 | scale to final value |
294 | */ | 372 | */ |
295 | mul = ctx->bm->height - oye; | 373 | mul = v_o_val - oye; |
296 | #ifdef HAVE_LCD_COLOR | 374 | for(rowacc_px = rowacc, rowtmp_px = rowtmp; rowacc_px != rowtmp; |
297 | for (x = 0; x < 3 * (unsigned int)ctx->bm->width; x++) | 375 | rowacc_px++, rowtmp_px++) |
298 | #else | 376 | *rowacc_px += mul * *rowtmp_px; |
299 | for (x = 0; x < (unsigned int)ctx->bm->width; x++) | ||
300 | #endif | ||
301 | rowacc[x] += mul * rowtmp[x]; | ||
302 | ctx->output_row(oy, (void*)rowacc, ctx); | 377 | ctx->output_row(oy, (void*)rowacc, ctx); |
303 | /* clear accumulator row, store partial coverage for next row */ | 378 | /* clear accumulator row, store partial coverage for next row */ |
304 | #ifdef HAVE_LCD_COLOR | 379 | #ifdef HAVE_LCD_COLOR |
@@ -319,20 +394,18 @@ static inline bool scale_v_area(struct rowset *rset, struct scaler_context *ctx) | |||
319 | } | 394 | } |
320 | 395 | ||
321 | #ifdef HAVE_UPSCALER | 396 | #ifdef HAVE_UPSCALER |
322 | /* Set up rounding and scale factors for the horizontal scaler. The divisor | ||
323 | is bm->width - 1, so that the first and last pixels in the row align | ||
324 | exactly between input and output | ||
325 | */ | ||
326 | static inline void scale_h_linear_setup(struct scaler_context *ctx) | ||
327 | { | ||
328 | ctx->divisor = ctx->bm->width - 1; | ||
329 | } | ||
330 | |||
331 | /* horizontal linear scaler */ | 397 | /* horizontal linear scaler */ |
332 | static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx, | 398 | static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx, |
333 | bool accum) | 399 | bool accum) |
334 | { | 400 | { |
335 | unsigned int ix, ox, ixe; | 401 | unsigned int ix, ox, ixe; |
402 | #if defined(CPU_SH) || defined (TEST_SH_MATH) | ||
403 | const uint32_t h_i_val = ctx->src->width - 1, | ||
404 | h_o_val = ctx->bm->width - 1; | ||
405 | #else | ||
406 | const uint32_t h_i_val = ctx->h_i_val, | ||
407 | h_o_val = ctx->h_o_val; | ||
408 | #endif | ||
336 | /* type x = x is an ugly hack for hiding an unitialized data warning. The | 409 | /* type x = x is an ugly hack for hiding an unitialized data warning. The |
337 | values are conditionally initialized before use, but other values are | 410 | values are conditionally initialized before use, but other values are |
338 | set such that this will occur before these are used. | 411 | set such that this will occur before these are used. |
@@ -348,27 +421,35 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx, | |||
348 | FILL_BUF_INIT(part,ctx->store_part,ctx->args); | 421 | FILL_BUF_INIT(part,ctx->store_part,ctx->args); |
349 | ix = 0; | 422 | ix = 0; |
350 | /* The error is set so that values are initialized on the first pass. */ | 423 | /* The error is set so that values are initialized on the first pass. */ |
351 | ixe = ctx->bm->width - 1; | 424 | ixe = h_o_val; |
352 | /* give other tasks a chance to run */ | 425 | /* give other tasks a chance to run */ |
353 | yield(); | 426 | yield(); |
354 | for (ox = 0; ox < (uint32_t)ctx->bm->width; ox++) | 427 | for (ox = 0; ox < (uint32_t)ctx->bm->width; ox++) |
355 | { | 428 | { |
356 | #ifdef HAVE_LCD_COLOR | 429 | #ifdef HAVE_LCD_COLOR |
357 | if (ixe >= ((uint32_t)ctx->bm->width - 1)) | 430 | if (ixe >= h_o_val) |
358 | { | 431 | { |
359 | /* Store the new "current" pixel value in rgbval, and the color | 432 | /* Store the new "current" pixel value in rgbval, and the color |
360 | step value in rgbinc. | 433 | step value in rgbinc. |
361 | */ | 434 | */ |
362 | ixe -= (ctx->bm->width - 1); | 435 | ixe -= h_o_val; |
363 | rgbinc.r = -(part->buf->red); | 436 | rgbinc.r = -(part->buf->red); |
364 | rgbinc.g = -(part->buf->green); | 437 | rgbinc.g = -(part->buf->green); |
365 | rgbinc.b = -(part->buf->blue); | 438 | rgbinc.b = -(part->buf->blue); |
366 | rgbval.r = (part->buf->red) * (ctx->bm->width - 1); | 439 | #if defined(CPU_COLDFIRE) |
367 | rgbval.g = (part->buf->green) * (ctx->bm->width - 1); | 440 | /* Coldfire EMAC math */ |
368 | rgbval.b = (part->buf->blue) * (ctx->bm->width - 1); | 441 | MAC(part->buf->red, h_o_val, 0); |
442 | MAC(part->buf->green, h_o_val, 1); | ||
443 | MAC(part->buf->blue, h_o_val, 2); | ||
444 | #else | ||
445 | /* generic C math */ | ||
446 | rgbval.r = (part->buf->red) * h_o_val; | ||
447 | rgbval.g = (part->buf->green) * h_o_val; | ||
448 | rgbval.b = (part->buf->blue) * h_o_val; | ||
449 | #endif /* CPU */ | ||
369 | ix += 1; | 450 | ix += 1; |
370 | /* If this wasn't the last pixel, add the next one to rgbinc. */ | 451 | /* If this wasn't the last pixel, add the next one to rgbinc. */ |
371 | if (ix < (uint32_t)ctx->src->width) { | 452 | if (LIKELY(ix < (uint32_t)ctx->src->width)) { |
372 | part->buf++; | 453 | part->buf++; |
373 | part->len--; | 454 | part->len--; |
374 | /* Fetch new pixels if needed */ | 455 | /* Fetch new pixels if needed */ |
@@ -379,14 +460,28 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx, | |||
379 | /* Add a partial step to rgbval, in this pixel isn't precisely | 460 | /* Add a partial step to rgbval, in this pixel isn't precisely |
380 | aligned with the new source pixel | 461 | aligned with the new source pixel |
381 | */ | 462 | */ |
463 | #if defined(CPU_COLDFIRE) | ||
464 | /* Coldfire EMAC math */ | ||
465 | MAC(rgbinc.r, ixe, 0); | ||
466 | MAC(rgbinc.g, ixe, 1); | ||
467 | MAC(rgbinc.b, ixe, 2); | ||
468 | #else | ||
469 | /* generic C math */ | ||
382 | rgbval.r += rgbinc.r * ixe; | 470 | rgbval.r += rgbinc.r * ixe; |
383 | rgbval.g += rgbinc.g * ixe; | 471 | rgbval.g += rgbinc.g * ixe; |
384 | rgbval.b += rgbinc.b * ixe; | 472 | rgbval.b += rgbinc.b * ixe; |
473 | #endif | ||
385 | } | 474 | } |
386 | /* Now multiple the color increment to its proper value */ | 475 | #if defined(CPU_COLDFIRE) |
387 | rgbinc.r *= ctx->src->width - 1; | 476 | /* get final EMAC result out of ACC registers */ |
388 | rgbinc.g *= ctx->src->width - 1; | 477 | MAC_OUT(rgbval.r, 0); |
389 | rgbinc.b *= ctx->src->width - 1; | 478 | MAC_OUT(rgbval.g, 1); |
479 | MAC_OUT(rgbval.b, 2); | ||
480 | #endif | ||
481 | /* Now multiply the color increment to its proper value */ | ||
482 | rgbinc.r *= h_i_val; | ||
483 | rgbinc.g *= h_i_val; | ||
484 | rgbinc.b *= h_i_val; | ||
390 | } else { | 485 | } else { |
391 | rgbval.r += rgbinc.r; | 486 | rgbval.r += rgbinc.r; |
392 | rgbval.g += rgbinc.g; | 487 | rgbval.g += rgbinc.g; |
@@ -395,27 +490,36 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx, | |||
395 | /* round and scale values, and accumulate or store to output */ | 490 | /* round and scale values, and accumulate or store to output */ |
396 | if (accum) | 491 | if (accum) |
397 | { | 492 | { |
398 | out_line[ox].r += rgbval.r; | 493 | out_line[ox].r += (rgbval.r + (1 << 21)) >> 22; |
399 | out_line[ox].g += rgbval.g; | 494 | out_line[ox].g += (rgbval.g + (1 << 21)) >> 22; |
400 | out_line[ox].b += rgbval.b; | 495 | out_line[ox].b += (rgbval.b + (1 << 21)) >> 22; |
401 | } else { | 496 | } else { |
402 | out_line[ox].r = rgbval.r; | 497 | out_line[ox].r = (rgbval.r + (1 << 21)) >> 22; |
403 | out_line[ox].g = rgbval.g; | 498 | out_line[ox].g = (rgbval.g + (1 << 21)) >> 22; |
404 | out_line[ox].b = rgbval.b; | 499 | out_line[ox].b = (rgbval.b + (1 << 21)) >> 22; |
405 | } | 500 | } |
406 | #else | 501 | #else |
407 | if (ixe >= ((uint32_t)ctx->bm->width - 1)) | 502 | if (ixe >= h_o_val) |
408 | { | 503 | { |
409 | /* Store the new "current" pixel value in rgbval, and the color | 504 | /* Store the new "current" pixel value in rgbval, and the color |
410 | step value in rgbinc. | 505 | step value in rgbinc. |
411 | */ | 506 | */ |
412 | ixe -= (ctx->bm->width - 1); | 507 | ixe -= h_o_val; |
413 | val = *(part->buf); | 508 | val = *(part->buf); |
414 | inc = -val; | 509 | inc = -val; |
415 | val = MULUQ(val, ctx->bm->width - 1); | 510 | #if defined(CPU_COLDFIRE) |
511 | /* Coldfire EMAC math */ | ||
512 | MAC(val, h_o_val, 0); | ||
513 | #elif defined(CPU_SH) | ||
514 | /* SH-1 16x16->32 math */ | ||
515 | val = mul_u16_u16(val, h_o_val); | ||
516 | #else | ||
517 | /* generic C math */ | ||
518 | val = val * h_o_val; | ||
519 | #endif | ||
416 | ix += 1; | 520 | ix += 1; |
417 | /* If this wasn't the last pixel, add the next one to rgbinc. */ | 521 | /* If this wasn't the last pixel, add the next one to rgbinc. */ |
418 | if (ix < (uint32_t)ctx->src->width) { | 522 | if (LIKELY(ix < (uint32_t)ctx->src->width)) { |
419 | part->buf++; | 523 | part->buf++; |
420 | part->len--; | 524 | part->len--; |
421 | /* Fetch new pixels if needed */ | 525 | /* Fetch new pixels if needed */ |
@@ -424,12 +528,40 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx, | |||
424 | /* Add a partial step to rgbval, in this pixel isn't precisely | 528 | /* Add a partial step to rgbval, in this pixel isn't precisely |
425 | aligned with the new source pixel | 529 | aligned with the new source pixel |
426 | */ | 530 | */ |
427 | val += MULQ(inc, ixe); | 531 | #if defined(CPU_COLDFIRE) |
532 | /* Coldfire EMAC math */ | ||
533 | MAC(inc, ixe, 0); | ||
534 | #elif defined(CPU_SH) | ||
535 | /* SH-1 16x16->32 math */ | ||
536 | val += mul_s16_s16(inc, ixe); | ||
537 | #else | ||
538 | /* generic C math */ | ||
539 | val += inc * ixe; | ||
540 | #endif | ||
428 | } | 541 | } |
542 | #if defined(CPU_COLDFIRE) | ||
543 | /* get final EMAC result out of ACC register */ | ||
544 | MAC_OUT(val, 0); | ||
545 | #endif | ||
429 | /* Now multiply the color increment to its proper value */ | 546 | /* Now multiply the color increment to its proper value */ |
430 | inc = MULQ(inc, ctx->src->width - 1); | 547 | #if defined(CPU_SH) |
548 | /* SH-1 16x16->32 math */ | ||
549 | inc = mul_s16_s16(inc, h_i_val); | ||
550 | #else | ||
551 | /* generic C math */ | ||
552 | inc *= h_i_val; | ||
553 | #endif | ||
431 | } else | 554 | } else |
432 | val += inc; | 555 | val += inc; |
556 | #if !(defined(CPU_SH) || defined(TEST_SH_MATH)) | ||
557 | /* round and scale values, and accumulate or store to output */ | ||
558 | if (accum) | ||
559 | { | ||
560 | out_line[ox] += (val + (1 << 21)) >> 22; | ||
561 | } else { | ||
562 | out_line[ox] = (val + (1 << 21)) >> 22; | ||
563 | } | ||
564 | #else | ||
433 | /* round and scale values, and accumulate or store to output */ | 565 | /* round and scale values, and accumulate or store to output */ |
434 | if (accum) | 566 | if (accum) |
435 | { | 567 | { |
@@ -438,7 +570,8 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx, | |||
438 | out_line[ox] = val; | 570 | out_line[ox] = val; |
439 | } | 571 | } |
440 | #endif | 572 | #endif |
441 | ixe += ctx->src->width - 1; | 573 | #endif |
574 | ixe += h_i_val; | ||
442 | } | 575 | } |
443 | return true; | 576 | return true; |
444 | } | 577 | } |
@@ -447,71 +580,66 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx, | |||
447 | static inline bool scale_v_linear(struct rowset *rset, | 580 | static inline bool scale_v_linear(struct rowset *rset, |
448 | struct scaler_context *ctx) | 581 | struct scaler_context *ctx) |
449 | { | 582 | { |
450 | uint32_t mul, x, iy, iye; | 583 | uint32_t mul, iy, iye; |
451 | int32_t oy; | 584 | int32_t oy; |
452 | /* Set up scale and rounding factors, the divisor is bm->height - 1 */ | 585 | #if defined(CPU_SH) || defined (TEST_SH_MATH) |
453 | ctx->divisor *= (ctx->bm->height - 1); | 586 | const uint32_t v_i_val = ctx->src->height - 1, |
454 | ctx->round = ctx->divisor >> 1; | 587 | v_o_val = ctx->bm->height - 1; |
455 | ctx->divisor = 1 + (-((ctx->divisor + 1) >> 1)) / ctx->divisor; | 588 | #else |
456 | /* Set up our two temp buffers. The names are generic because they'll be | 589 | const uint32_t v_i_val = ctx->v_i_val, |
457 | swapped each time a new input row is read | 590 | v_o_val = ctx->v_o_val; |
591 | #endif | ||
592 | /* Set up our buffers, to store the increment and current value for each | ||
593 | column, and one temp buffer used to read in new rows. | ||
458 | */ | 594 | */ |
459 | #ifdef HAVE_LCD_COLOR | 595 | #ifdef HAVE_LCD_COLOR |
460 | uint32_t *rowinc = (uint32_t *)(ctx->buf), | 596 | uint32_t *rowinc = (uint32_t *)(ctx->buf), |
461 | *rowval = rowinc + 3 * ctx->bm->width, | 597 | *rowval = rowinc + 3 * ctx->bm->width, |
462 | *rowtmp = rowval + 3 * ctx->bm->width; | 598 | *rowtmp = rowval + 3 * ctx->bm->width, |
463 | #else | 599 | #else |
464 | uint32_t *rowinc = (uint32_t *)(ctx->buf), | 600 | uint32_t *rowinc = (uint32_t *)(ctx->buf), |
465 | *rowval = rowinc + ctx->bm->width, | 601 | *rowval = rowinc + ctx->bm->width, |
466 | *rowtmp = rowval + ctx->bm->width; | 602 | *rowtmp = rowval + ctx->bm->width, |
467 | #endif | 603 | #endif |
604 | *rowinc_px, *rowval_px, *rowtmp_px; | ||
468 | 605 | ||
469 | SDEBUGF("scale_v_linear\n"); | 606 | SDEBUGF("scale_v_linear\n"); |
470 | mul = 0; | 607 | mul = 0; |
471 | iy = 0; | 608 | iy = 0; |
472 | iye = ctx->bm->height - 1; | 609 | iye = v_o_val; |
473 | /* get first scaled row in rowtmp */ | 610 | /* get first scaled row in rowtmp */ |
474 | if(!ctx->h_scaler((void*)rowtmp, ctx, false)) | 611 | if(!ctx->h_scaler((void*)rowtmp, ctx, false)) |
475 | return false; | 612 | return false; |
476 | for (oy = rset->rowstart; oy != rset->rowstop; oy += rset->rowstep) | 613 | for (oy = rset->rowstart; oy != rset->rowstop; oy += rset->rowstep) |
477 | { | 614 | { |
478 | if (iye >= (uint32_t)ctx->bm->height - 1) | 615 | if (iye >= v_o_val) |
479 | { | 616 | { |
480 | iye -= ctx->bm->height - 1; | 617 | iye -= v_o_val; |
481 | iy += 1; | 618 | iy += 1; |
482 | #ifdef HAVE_LCD_COLOR | 619 | for(rowinc_px = rowinc, rowtmp_px = rowtmp, rowval_px = rowval; |
483 | for (x = 0; x < 3 * (uint32_t)ctx->bm->width; x++) | 620 | rowinc_px < rowval; rowinc_px++, rowtmp_px++, rowval_px++) |
484 | #else | ||
485 | for (x = 0; x < (uint32_t)ctx->bm->width; x++) | ||
486 | #endif | ||
487 | { | 621 | { |
488 | rowinc[x] = -rowtmp[x]; | 622 | *rowinc_px = -*rowtmp_px; |
489 | rowval[x] = rowtmp[x] * (ctx->bm->height - 1); | 623 | *rowval_px = *rowtmp_px * v_o_val; |
490 | } | 624 | } |
491 | if (iy < (uint32_t)ctx->src->height) | 625 | if (iy < (uint32_t)ctx->src->height) |
492 | { | 626 | { |
493 | if (!ctx->h_scaler((void*)rowtmp, ctx, false)) | 627 | if (!ctx->h_scaler((void*)rowtmp, ctx, false)) |
494 | return false; | 628 | return false; |
495 | #ifdef HAVE_LCD_COLOR | 629 | for(rowinc_px = rowinc, rowtmp_px = rowtmp, rowval_px = rowval; |
496 | for (x = 0; x < 3 * (uint32_t)ctx->bm->width; x++) | 630 | rowinc_px < rowval; rowinc_px++, rowtmp_px++, rowval_px++) |
497 | #else | ||
498 | for (x = 0; x < (uint32_t)ctx->bm->width; x++) | ||
499 | #endif | ||
500 | { | 631 | { |
501 | rowinc[x] += rowtmp[x]; | 632 | *rowinc_px += *rowtmp_px; |
502 | rowval[x] += rowinc[x] * iye; | 633 | *rowval_px += *rowinc_px * iye; |
503 | rowinc[x] *= ctx->src->height - 1; | 634 | *rowinc_px *= v_i_val; |
504 | } | 635 | } |
505 | } | 636 | } |
506 | } else | 637 | } else |
507 | #ifdef HAVE_LCD_COLOR | 638 | for(rowinc_px = rowinc, rowval_px = rowval; rowinc_px < rowval; |
508 | for (x = 0; x < 3 * (uint32_t)ctx->bm->width; x++) | 639 | rowinc_px++, rowval_px++) |
509 | #else | 640 | *rowval_px += *rowinc_px; |
510 | for (x = 0; x < (uint32_t)ctx->bm->width; x++) | ||
511 | #endif | ||
512 | rowval[x] += rowinc[x]; | ||
513 | ctx->output_row(oy, (void*)rowval, ctx); | 641 | ctx->output_row(oy, (void*)rowval, ctx); |
514 | iye += ctx->src->height - 1; | 642 | iye += v_i_val; |
515 | } | 643 | } |
516 | return true; | 644 | return true; |
517 | } | 645 | } |
@@ -533,9 +661,9 @@ static void output_row_32_native_fromyuv(uint32_t row, void * row_in, | |||
533 | for (col = 0; col < ctx->bm->width; col++) { | 661 | for (col = 0; col < ctx->bm->width; col++) { |
534 | if (ctx->dither) | 662 | if (ctx->dither) |
535 | delta = DITHERXDY(col,dy); | 663 | delta = DITHERXDY(col,dy); |
536 | y = SC_MUL(qp->b + ctx->round, ctx->divisor); | 664 | y = SC_OUT(qp->b, ctx); |
537 | u = SC_MUL(qp->g + ctx->round, ctx->divisor); | 665 | u = SC_OUT(qp->g, ctx); |
538 | v = SC_MUL(qp->r + ctx->round, ctx->divisor); | 666 | v = SC_OUT(qp->r, ctx); |
539 | qp++; | 667 | qp++; |
540 | yuv_to_rgb(y, u, v, &r, &g, &b); | 668 | yuv_to_rgb(y, u, v, &r, &g, &b); |
541 | r = (31 * r + (r >> 3) + delta) >> 8; | 669 | r = (31 * r + (r >> 3) + delta) >> 8; |
@@ -571,7 +699,7 @@ static void output_row_32_native(uint32_t row, void * row_in, | |||
571 | for (col = 0; col < ctx->bm->width; col++) { | 699 | for (col = 0; col < ctx->bm->width; col++) { |
572 | if (ctx->dither) | 700 | if (ctx->dither) |
573 | delta = DITHERXDY(col,dy); | 701 | delta = DITHERXDY(col,dy); |
574 | bright = SC_MUL((*qp++) + ctx->round,ctx->divisor); | 702 | bright = SC_OUT(*qp++, ctx); |
575 | bright = (3 * bright + (bright >> 6) + delta) >> 8; | 703 | bright = (3 * bright + (bright >> 6) + delta) >> 8; |
576 | data |= (~bright & 3) << shift; | 704 | data |= (~bright & 3) << shift; |
577 | shift -= 2; | 705 | shift -= 2; |
@@ -594,7 +722,7 @@ static void output_row_32_native(uint32_t row, void * row_in, | |||
594 | for (col = 0; col < ctx->bm->width; col++) { | 722 | for (col = 0; col < ctx->bm->width; col++) { |
595 | if (ctx->dither) | 723 | if (ctx->dither) |
596 | delta = DITHERXDY(col,dy); | 724 | delta = DITHERXDY(col,dy); |
597 | bright = SC_MUL((*qp++) + ctx->round, ctx->divisor); | 725 | bright = SC_OUT(*qp++, ctx); |
598 | bright = (3 * bright + (bright >> 6) + delta) >> 8; | 726 | bright = (3 * bright + (bright >> 6) + delta) >> 8; |
599 | *dest++ |= (~bright & 3) << shift; | 727 | *dest++ |= (~bright & 3) << shift; |
600 | } | 728 | } |
@@ -609,7 +737,7 @@ static void output_row_32_native(uint32_t row, void * row_in, | |||
609 | for (col = 0; col < ctx->bm->width; col++) { | 737 | for (col = 0; col < ctx->bm->width; col++) { |
610 | if (ctx->dither) | 738 | if (ctx->dither) |
611 | delta = DITHERXDY(col,dy); | 739 | delta = DITHERXDY(col,dy); |
612 | bright = SC_MUL((*qp++) + ctx->round, ctx->divisor); | 740 | bright = SC_OUT(*qp++, ctx); |
613 | bright = (3 * bright + (bright >> 6) + delta) >> 8; | 741 | bright = (3 * bright + (bright >> 6) + delta) >> 8; |
614 | *dest++ |= vi_pattern[bright] << shift; | 742 | *dest++ |= vi_pattern[bright] << shift; |
615 | } | 743 | } |
@@ -625,9 +753,9 @@ static void output_row_32_native(uint32_t row, void * row_in, | |||
625 | if (ctx->dither) | 753 | if (ctx->dither) |
626 | delta = DITHERXDY(col,dy); | 754 | delta = DITHERXDY(col,dy); |
627 | q0 = *qp++; | 755 | q0 = *qp++; |
628 | r = SC_MUL(q0.r + ctx->round, ctx->divisor); | 756 | r = SC_OUT(q0.r, ctx); |
629 | g = SC_MUL(q0.g + ctx->round, ctx->divisor); | 757 | g = SC_OUT(q0.g, ctx); |
630 | b = SC_MUL(q0.b + ctx->round, ctx->divisor); | 758 | b = SC_OUT(q0.b, ctx); |
631 | r = (31 * r + (r >> 3) + delta) >> 8; | 759 | r = (31 * r + (r >> 3) + delta) >> 8; |
632 | g = (63 * g + (g >> 2) + delta) >> 8; | 760 | g = (63 * g + (g >> 2) + delta) >> 8; |
633 | b = (31 * b + (b >> 3) + delta) >> 8; | 761 | b = (31 * b + (b >> 3) + delta) >> 8; |
@@ -664,13 +792,10 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src, | |||
664 | struct img_part* (*store_part)(void *args), | 792 | struct img_part* (*store_part)(void *args), |
665 | void *args) | 793 | void *args) |
666 | { | 794 | { |
667 | |||
668 | #ifdef HAVE_UPSCALER | ||
669 | const int sw = src->width; | 795 | const int sw = src->width; |
670 | const int sh = src->height; | 796 | const int sh = src->height; |
671 | const int dw = bm->width; | 797 | const int dw = bm->width; |
672 | const int dh = bm->height; | 798 | const int dh = bm->height; |
673 | #endif | ||
674 | int ret; | 799 | int ret; |
675 | #ifdef HAVE_LCD_COLOR | 800 | #ifdef HAVE_LCD_COLOR |
676 | unsigned int needed = sizeof(struct uint32_rgb) * 3 * bm->width; | 801 | unsigned int needed = sizeof(struct uint32_rgb) * 3 * bm->width; |
@@ -721,6 +846,9 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src, | |||
721 | ctx.bm = bm; | 846 | ctx.bm = bm; |
722 | ctx.src = src; | 847 | ctx.src = src; |
723 | ctx.dither = dither; | 848 | ctx.dither = dither; |
849 | #if defined(CPU_SH) || defined (TEST_SH_MATH) | ||
850 | uint32_t div; | ||
851 | #endif | ||
724 | #if !defined(PLUGIN) | 852 | #if !defined(PLUGIN) |
725 | #if defined(HAVE_LCD_COLOR) && defined(HAVE_JPEG) | 853 | #if defined(HAVE_LCD_COLOR) && defined(HAVE_JPEG) |
726 | ctx.output_row = format_index ? output_row_32_native_fromyuv | 854 | ctx.output_row = format_index ? output_row_32_native_fromyuv |
@@ -740,23 +868,56 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src, | |||
740 | { | 868 | { |
741 | #endif | 869 | #endif |
742 | ctx.h_scaler = scale_h_area; | 870 | ctx.h_scaler = scale_h_area; |
743 | scale_h_area_setup(&ctx); | 871 | #if defined(CPU_SH) || defined (TEST_SH_MATH) |
872 | div = sw; | ||
873 | #else | ||
874 | uint32_t h_div = (1U << 24) / sw; | ||
875 | ctx.h_i_val = sw * h_div; | ||
876 | ctx.h_o_val = dw * h_div; | ||
877 | #endif | ||
744 | #ifdef HAVE_UPSCALER | 878 | #ifdef HAVE_UPSCALER |
745 | } else { | 879 | } else { |
746 | ctx.h_scaler = scale_h_linear; | 880 | ctx.h_scaler = scale_h_linear; |
747 | scale_h_linear_setup(&ctx); | 881 | #if defined(CPU_SH) || defined (TEST_SH_MATH) |
882 | div = dw - 1; | ||
883 | #else | ||
884 | uint32_t h_div = (1U << 24) / (dw - 1); | ||
885 | ctx.h_i_val = (sw - 1) * h_div; | ||
886 | ctx.h_o_val = (dw - 1) * h_div; | ||
887 | #endif | ||
748 | } | 888 | } |
749 | #endif | 889 | #endif |
750 | SC_MUL_INIT; | 890 | #ifdef CPU_COLDFIRE |
891 | coldfire_set_macsr(EMAC_UNSIGNED); | ||
892 | #endif | ||
751 | #ifdef HAVE_UPSCALER | 893 | #ifdef HAVE_UPSCALER |
752 | if (sh > dh) | 894 | if (sh > dh) |
753 | #endif | 895 | #endif |
896 | { | ||
897 | #if defined(CPU_SH) || defined (TEST_SH_MATH) | ||
898 | div *= sh; | ||
899 | ctx.recip = ((uint32_t)(-div)) / div + 1; | ||
900 | #else | ||
901 | uint32_t v_div = (1U << 22) / sh; | ||
902 | ctx.v_i_val = sh * v_div; | ||
903 | ctx.v_o_val = dh * v_div; | ||
904 | #endif | ||
754 | ret = scale_v_area(rset, &ctx); | 905 | ret = scale_v_area(rset, &ctx); |
906 | } | ||
755 | #ifdef HAVE_UPSCALER | 907 | #ifdef HAVE_UPSCALER |
756 | else | 908 | else |
909 | { | ||
910 | #if defined(CPU_SH) || defined (TEST_SH_MATH) | ||
911 | div *= dh - 1; | ||
912 | ctx.recip = ((uint32_t)(-div)) / div + 1; | ||
913 | #else | ||
914 | uint32_t v_div = (1U << 22) / dh; | ||
915 | ctx.v_i_val = (sh - 1) * v_div; | ||
916 | ctx.v_o_val = (dh - 1) * v_div; | ||
917 | #endif | ||
757 | ret = scale_v_linear(rset, &ctx); | 918 | ret = scale_v_linear(rset, &ctx); |
919 | } | ||
758 | #endif | 920 | #endif |
759 | SC_MUL_END; | ||
760 | #ifdef HAVE_ADJUSTABLE_CPU_FREQ | 921 | #ifdef HAVE_ADJUSTABLE_CPU_FREQ |
761 | cpu_boost(false); | 922 | cpu_boost(false); |
762 | #endif | 923 | #endif |
diff --git a/apps/recorder/resize.h b/apps/recorder/resize.h index 2964fcd2a9..ef32066a0d 100644 --- a/apps/recorder/resize.h +++ b/apps/recorder/resize.h | |||
@@ -43,67 +43,61 @@ | |||
43 | #define MAX_SC_STACK_ALLOC 0 | 43 | #define MAX_SC_STACK_ALLOC 0 |
44 | #define HAVE_UPSCALER 1 | 44 | #define HAVE_UPSCALER 1 |
45 | 45 | ||
46 | #if defined(CPU_COLDFIRE) | 46 | #if defined(CPU_SH) |
47 | #define SC_MUL_INIT \ | 47 | /* perform 32x32->40 unsigned multiply, round off and return top 8 bits */ |
48 | unsigned long macsr_st = coldfire_get_macsr(); \ | 48 | static inline uint32_t sc_mul_u32_rnd(uint32_t m, uint32_t n) |
49 | coldfire_set_macsr(EMAC_UNSIGNED); | ||
50 | #define SC_MUL_END coldfire_set_macsr(macsr_st); | ||
51 | #define SC_MUL(x, y) \ | ||
52 | ({ \ | ||
53 | unsigned long t; \ | ||
54 | asm ("mac.l %[a], %[b], %%acc0\n\t" \ | ||
55 | "move.l %%accext01, %[t]\n\t" \ | ||
56 | "move.l #0, %%acc0\n\t" \ | ||
57 | : [t] "=r" (t) : [a] "r" (x), [b] "r" (y)); \ | ||
58 | t; \ | ||
59 | }) | ||
60 | #elif (CONFIG_CPU == SH7034) | ||
61 | /* multiply two unsigned 32 bit values and return the top 32 bit | ||
62 | * of the 64 bit result */ | ||
63 | static inline unsigned sc_mul32(unsigned a, unsigned b) | ||
64 | { | 49 | { |
65 | unsigned r, t1, t2, t3; | 50 | unsigned r, t1, t2, t3; |
66 | 51 | unsigned h = 1 << 15; | |
52 | /* notation: | ||
53 | m = ab, n = cd | ||
54 | final result is (((a *c) << 32) + ((b * c + a * d) << 16) + b * d + | ||
55 | (1 << 31)) >> 32 | ||
56 | */ | ||
67 | asm ( | 57 | asm ( |
68 | "swap.w %[a], %[t1] \n" /* t1 = ba */ | 58 | "swap.w %[m], %[t1]\n\t" /* t1 = ba */ |
69 | "mulu %[t1], %[b] \n" /* a * d */ | 59 | "mulu %[m], %[n]\n\t" /* b * d */ |
70 | "swap.w %[b], %[t3] \n" /* t3 = dc */ | 60 | "swap.w %[n], %[t3]\n\t" /* t3 = dc */ |
71 | "sts macl, %[t2] \n" /* t2 = a * d */ | 61 | "sts macl, %[r]\n\t" /* r = b * d */ |
72 | "mulu %[t1], %[t3] \n" /* a * c */ | 62 | "mulu %[m], %[t3]\n\t" /* b * c */ |
73 | "sts macl, %[r] \n" /* hi = a * c */ | 63 | "shlr16 %[r]\n\t" |
74 | "mulu %[a], %[t3] \n" /* b * c */ | 64 | "sts macl, %[t2]\n\t" /* t2 = b * c */ |
75 | "clrt \n" | 65 | "mulu %[t1], %[t3]\n\t" /* a * c */ |
76 | "sts macl, %[t3] \n" /* t3 = b * c */ | 66 | "add %[t2], %[r]\n\t" |
77 | "addc %[t2], %[t3] \n" /* t3 += t2, carry -> t2 */ | 67 | "sts macl, %[t3]\n\t" /* t3 = a * c */ |
78 | "movt %[t2] \n" | 68 | "mulu %[t1], %[n]\n\t" /* a * d */ |
79 | "mulu %[a], %[b] \n" /* b * d */ | 69 | "shll16 %[t3]\n\t" |
80 | "mov %[t3], %[t1] \n" /* t1t3 = t2t3 << 16 */ | 70 | "sts macl, %[t2]\n\t" /* t2 = a * d */ |
81 | "xtrct %[t2], %[t1] \n" | 71 | "add %[t2], %[r]\n\t" |
82 | "shll16 %[t3] \n" | 72 | "add %[t3], %[r]\n\t" /* r = ((b * d) >> 16) + (b * c + a * d) + |
83 | "sts macl, %[t2] \n" /* lo = b * d */ | 73 | ((a * c) << 16) */ |
84 | "clrt \n" /* hi.lo += t1t3 */ | 74 | "add %[h], %[r]\n\t" /* round result */ |
85 | "addc %[t3], %[t2] \n" | 75 | "shlr16 %[r]\n\t" /* truncate result */ |
86 | "addc %[t1], %[r] \n" | ||
87 | : /* outputs */ | 76 | : /* outputs */ |
88 | [r] "=&r"(r), | 77 | [r] "=&r"(r), |
89 | [t1]"=&r"(t1), | 78 | [t1]"=&r"(t1), |
90 | [t2]"=&r"(t2), | 79 | [t2]"=&r"(t2), |
91 | [t3]"=&r"(t3) | 80 | [t3]"=&r"(t3) |
92 | : /* inputs */ | 81 | : /* inputs */ |
93 | [a] "r" (a), | 82 | [h] "r" (h), |
94 | [b] "r" (b) | 83 | [m] "r" (m), |
84 | [n] "r" (n) | ||
95 | ); | 85 | ); |
96 | return r; | 86 | return r; |
97 | } | 87 | } |
98 | #define SC_MUL(x, y) sc_mul32(x, y) | 88 | #elif defined(TEST_SH_MATH) |
99 | #define SC_MUL_INIT | 89 | static inline uint32_t sc_mul_u32_rnd(uint32_t op1, uint32_t op2) |
100 | #define SC_MUL_END | 90 | { |
91 | uint64_t tmp = (uint64_t)op1 * op2; | ||
92 | tmp += 1LU << 31; | ||
93 | tmp >>= 32; | ||
94 | return tmp; | ||
95 | } | ||
96 | #else | ||
97 | #define SC_OUT(n, c) (((n) + (1 << 23)) >> 24) | ||
101 | #endif | 98 | #endif |
102 | 99 | #ifndef SC_OUT | |
103 | #ifndef SC_MUL | 100 | #define SC_OUT(n, c) (sc_mul_u32_rnd(n, (c)->recip)) |
104 | #define SC_MUL(x, y) ((x) * (uint64_t)(y) >> 32) | ||
105 | #define SC_MUL_INIT | ||
106 | #define SC_MUL_END | ||
107 | #endif | 101 | #endif |
108 | 102 | ||
109 | struct img_part { | 103 | struct img_part { |
@@ -130,8 +124,14 @@ struct uint32_rgb { | |||
130 | horizontal scaler, and row output | 124 | horizontal scaler, and row output |
131 | */ | 125 | */ |
132 | struct scaler_context { | 126 | struct scaler_context { |
133 | uint32_t divisor; | 127 | #if defined(CPU_SH) || defined(TEST_SH_MATH) |
134 | uint32_t round; | 128 | uint32_t recip; |
129 | #else | ||
130 | uint32_t h_i_val; | ||
131 | uint32_t h_o_val; | ||
132 | uint32_t v_i_val; | ||
133 | uint32_t v_o_val; | ||
134 | #endif | ||
135 | struct bitmap *bm; | 135 | struct bitmap *bm; |
136 | struct dim *src; | 136 | struct dim *src; |
137 | unsigned char *buf; | 137 | unsigned char *buf; |