diff options
author | Jens Arnold <amiconn@rockbox.org> | 2009-06-07 21:27:05 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2009-06-07 21:27:05 +0000 |
commit | 1d6df54df27cb41c02226678a2c8f9feddd1a1e0 (patch) | |
tree | 5fdc6dd98ac0208f5c3351b062063af6914cbefb /apps/recorder/jpeg_load.c | |
parent | c3182ec333982e961d3babfbdb1125fd5bac7fb8 (diff) | |
download | rockbox-1d6df54df27cb41c02226678a2c8f9feddd1a1e0.tar.gz rockbox-1d6df54df27cb41c02226678a2c8f9feddd1a1e0.zip |
Convert a number of places in core and plugins to use the BIT_N() macro instead of 1<<n. Speeds up things on SH1, and also reduces core binsize. Most notable speedups: 1 bit lcd driver: drawpixel +20%, drawline + 27%, hline +5%; jpeg viewer: +8% for 1/8 scaling. Other targets are unaffected.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21205 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/recorder/jpeg_load.c')
-rw-r--r-- | apps/recorder/jpeg_load.c | 43 |
1 files changed, 22 insertions, 21 deletions
diff --git a/apps/recorder/jpeg_load.c b/apps/recorder/jpeg_load.c index 027ddaf5a8..3334019958 100644 --- a/apps/recorder/jpeg_load.c +++ b/apps/recorder/jpeg_load.c | |||
@@ -1506,7 +1506,8 @@ INLINE void fix_huff_tables(struct jpeg *p_jpeg) | |||
1506 | */ | 1506 | */ |
1507 | INLINE void fix_quant_tables(struct jpeg *p_jpeg) | 1507 | INLINE void fix_quant_tables(struct jpeg *p_jpeg) |
1508 | { | 1508 | { |
1509 | int shift, i, x, y, a; | 1509 | int shift, i, a; |
1510 | unsigned x, y; | ||
1510 | for (i = 0; i < 2; i++) | 1511 | for (i = 0; i < 2; i++) |
1511 | { | 1512 | { |
1512 | shift = idct_tbl[p_jpeg->v_scale[i]].v_scale + | 1513 | shift = idct_tbl[p_jpeg->v_scale[i]].v_scale + |
@@ -1514,9 +1515,9 @@ INLINE void fix_quant_tables(struct jpeg *p_jpeg) | |||
1514 | if (shift) | 1515 | if (shift) |
1515 | { | 1516 | { |
1516 | a = 0; | 1517 | a = 0; |
1517 | for (y = 0; y < 1 << p_jpeg->h_scale[i]; y++) | 1518 | for (y = 0; y < BIT_N(p_jpeg->h_scale[i]); y++) |
1518 | { | 1519 | { |
1519 | for (x = 0; x < 1 << p_jpeg->v_scale[i]; x++) | 1520 | for (x = 0; x < BIT_N(p_jpeg->v_scale[i]); x++) |
1520 | p_jpeg->quanttable[i][zig[a+x]] <<= shift; | 1521 | p_jpeg->quanttable[i][zig[a+x]] <<= shift; |
1521 | a += 8; | 1522 | a += 8; |
1522 | } | 1523 | } |
@@ -1586,7 +1587,7 @@ INLINE int get_bits(struct jpeg *p_jpeg, int nbits) | |||
1586 | #ifdef JPEG_BS_DEBUG | 1587 | #ifdef JPEG_BS_DEBUG |
1587 | if (nbits > p_jpeg->bitbuf_bits) | 1588 | if (nbits > p_jpeg->bitbuf_bits) |
1588 | DEBUGF("bitbuffer underrun\n"); | 1589 | DEBUGF("bitbuffer underrun\n"); |
1589 | int mask = 1 << (p_jpeg->bitbuf_bits - 1); | 1590 | int mask = BIT_N(p_jpeg->bitbuf_bits - 1); |
1590 | int i; | 1591 | int i; |
1591 | DEBUGF("get %d bits: ", nbits); | 1592 | DEBUGF("get %d bits: ", nbits); |
1592 | for (i = 0; i < nbits; i++) | 1593 | for (i = 0; i < nbits; i++) |
@@ -1594,13 +1595,13 @@ INLINE int get_bits(struct jpeg *p_jpeg, int nbits) | |||
1594 | DEBUGF("\n"); | 1595 | DEBUGF("\n"); |
1595 | #endif | 1596 | #endif |
1596 | return ((int) (p_jpeg->bitbuf >> (p_jpeg->bitbuf_bits -= nbits))) & | 1597 | return ((int) (p_jpeg->bitbuf >> (p_jpeg->bitbuf_bits -= nbits))) & |
1597 | ((1<<nbits)-1); | 1598 | (BIT_N(nbits)-1); |
1598 | } | 1599 | } |
1599 | 1600 | ||
1600 | INLINE int peek_bits(struct jpeg *p_jpeg, int nbits) | 1601 | INLINE int peek_bits(struct jpeg *p_jpeg, int nbits) |
1601 | { | 1602 | { |
1602 | #ifdef JPEG_BS_DEBUG | 1603 | #ifdef JPEG_BS_DEBUG |
1603 | int mask = 1 << (p_jpeg->bitbuf_bits - 1); | 1604 | int mask = BIT_N(p_jpeg->bitbuf_bits - 1); |
1604 | int i; | 1605 | int i; |
1605 | DEBUGF("peek %d bits: ", nbits); | 1606 | DEBUGF("peek %d bits: ", nbits); |
1606 | for (i = 0; i < nbits; i++) | 1607 | for (i = 0; i < nbits; i++) |
@@ -1608,13 +1609,13 @@ INLINE int peek_bits(struct jpeg *p_jpeg, int nbits) | |||
1608 | DEBUGF("\n"); | 1609 | DEBUGF("\n"); |
1609 | #endif | 1610 | #endif |
1610 | return ((int) (p_jpeg->bitbuf >> (p_jpeg->bitbuf_bits - nbits))) & | 1611 | return ((int) (p_jpeg->bitbuf >> (p_jpeg->bitbuf_bits - nbits))) & |
1611 | ((1<<nbits)-1); | 1612 | (BIT_N(nbits)-1); |
1612 | } | 1613 | } |
1613 | 1614 | ||
1614 | INLINE void drop_bits(struct jpeg *p_jpeg, int nbits) | 1615 | INLINE void drop_bits(struct jpeg *p_jpeg, int nbits) |
1615 | { | 1616 | { |
1616 | #ifdef JPEG_BS_DEBUG | 1617 | #ifdef JPEG_BS_DEBUG |
1617 | int mask = 1 << (p_jpeg->bitbuf_bits - 1); | 1618 | int mask = BIT_N(p_jpeg->bitbuf_bits - 1); |
1618 | int i; | 1619 | int i; |
1619 | DEBUGF("drop %d bits: ", nbits); | 1620 | DEBUGF("drop %d bits: ", nbits); |
1620 | for (i = 0; i < nbits; i++) | 1621 | for (i = 0; i < nbits; i++) |
@@ -1675,7 +1676,7 @@ static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */ | |||
1675 | ({ \ | 1676 | ({ \ |
1676 | int x__ = x; \ | 1677 | int x__ = x; \ |
1677 | int s__ = s; \ | 1678 | int s__ = s; \ |
1678 | x__ & (1 << (s__- 1)) ? x__ : x__ + (-1 << s__) + 1; \ | 1679 | x__ & BIT_N(s__- 1) ? x__ : x__ + (-1 << s__) + 1; \ |
1679 | }) | 1680 | }) |
1680 | #endif | 1681 | #endif |
1681 | 1682 | ||
@@ -1764,14 +1765,14 @@ static struct img_part *store_row_jpeg(void *jpeg_args) | |||
1764 | #endif | 1765 | #endif |
1765 | unsigned int width = p_jpeg->x_mbl << mcu_hscale; | 1766 | unsigned int width = p_jpeg->x_mbl << mcu_hscale; |
1766 | unsigned int b_width = width * JPEG_PIX_SZ; | 1767 | unsigned int b_width = width * JPEG_PIX_SZ; |
1767 | int height = 1U << mcu_vscale; | 1768 | int height = BIT_N(mcu_vscale); |
1768 | int x; | 1769 | int x; |
1769 | if (!p_jpeg->mcu_row) /* Need to decode a new row of MCUs */ | 1770 | if (!p_jpeg->mcu_row) /* Need to decode a new row of MCUs */ |
1770 | { | 1771 | { |
1771 | p_jpeg->out_ptr = (unsigned char *)p_jpeg->img_buf; | 1772 | p_jpeg->out_ptr = (unsigned char *)p_jpeg->img_buf; |
1772 | int store_offs[4]; | 1773 | int store_offs[4]; |
1773 | #ifdef HAVE_LCD_COLOR | 1774 | #ifdef HAVE_LCD_COLOR |
1774 | unsigned mcu_width = 1U << mcu_hscale; | 1775 | unsigned mcu_width = BIT_N(mcu_hscale); |
1775 | #endif | 1776 | #endif |
1776 | int mcu_offset = JPEG_PIX_SZ << mcu_hscale; | 1777 | int mcu_offset = JPEG_PIX_SZ << mcu_hscale; |
1777 | unsigned char *out = p_jpeg->out_ptr; | 1778 | unsigned char *out = p_jpeg->out_ptr; |
@@ -1868,8 +1869,8 @@ static struct img_part *store_row_jpeg(void *jpeg_args) | |||
1868 | if (!ci) | 1869 | if (!ci) |
1869 | #endif | 1870 | #endif |
1870 | { | 1871 | { |
1871 | int idct_cols = 1 << MIN(p_jpeg->h_scale[!!ci], 3); | 1872 | int idct_cols = BIT_N(MIN(p_jpeg->h_scale[!!ci], 3)); |
1872 | int idct_rows = 1 << p_jpeg->v_scale[!!ci]; | 1873 | int idct_rows = BIT_N(p_jpeg->v_scale[!!ci]); |
1873 | unsigned char *b_out = out + (ci ? ci : store_offs[blkn]); | 1874 | unsigned char *b_out = out + (ci ? ci : store_offs[blkn]); |
1874 | if (idct_tbl[p_jpeg->v_scale[!!ci]].v_idct) | 1875 | if (idct_tbl[p_jpeg->v_scale[!!ci]].v_idct) |
1875 | idct_tbl[p_jpeg->v_scale[!!ci]].v_idct(block, | 1876 | idct_tbl[p_jpeg->v_scale[!!ci]].v_idct(block, |
@@ -2043,8 +2044,8 @@ int read_jpeg_fd(int fd, | |||
2043 | } | 2044 | } |
2044 | p_jpeg->h_scale[0] = calc_scale(p_jpeg->x_size, bm->width); | 2045 | p_jpeg->h_scale[0] = calc_scale(p_jpeg->x_size, bm->width); |
2045 | p_jpeg->v_scale[0] = calc_scale(p_jpeg->y_size, bm->height); | 2046 | p_jpeg->v_scale[0] = calc_scale(p_jpeg->y_size, bm->height); |
2046 | JDEBUGF("luma IDCT size: %dx%d\n", 1 << p_jpeg->h_scale[0], | 2047 | JDEBUGF("luma IDCT size: %dx%d\n", BIT_N(p_jpeg->h_scale[0]), |
2047 | 1 << p_jpeg->v_scale[0]); | 2048 | BIT_N(p_jpeg->v_scale[0])); |
2048 | if ((p_jpeg->x_size << p_jpeg->h_scale[0]) >> 3 == bm->width && | 2049 | if ((p_jpeg->x_size << p_jpeg->h_scale[0]) >> 3 == bm->width && |
2049 | (p_jpeg->y_size << p_jpeg->v_scale[0]) >> 3 == bm->height) | 2050 | (p_jpeg->y_size << p_jpeg->v_scale[0]) >> 3 == bm->height) |
2050 | resize = false; | 2051 | resize = false; |
@@ -2053,24 +2054,24 @@ int read_jpeg_fd(int fd, | |||
2053 | p_jpeg->frameheader[0].horizontal_sampling - 1; | 2054 | p_jpeg->frameheader[0].horizontal_sampling - 1; |
2054 | p_jpeg->v_scale[1] = p_jpeg->v_scale[0] + | 2055 | p_jpeg->v_scale[1] = p_jpeg->v_scale[0] + |
2055 | p_jpeg->frameheader[0].vertical_sampling - 1; | 2056 | p_jpeg->frameheader[0].vertical_sampling - 1; |
2056 | JDEBUGF("chroma IDCT size: %dx%d\n", 1 << p_jpeg->h_scale[1], | 2057 | JDEBUGF("chroma IDCT size: %dx%d\n", BIT_N(p_jpeg->h_scale[1]), |
2057 | 1 << p_jpeg->v_scale[1]); | 2058 | BIT_N(p_jpeg->v_scale[1])); |
2058 | #endif | 2059 | #endif |
2059 | JDEBUGF("scaling from %dx%d -> %dx%d\n", | 2060 | JDEBUGF("scaling from %dx%d -> %dx%d\n", |
2060 | (p_jpeg->x_size << p_jpeg->h_scale[0]) >> 3, | 2061 | (p_jpeg->x_size << p_jpeg->h_scale[0]) >> 3, |
2061 | (p_jpeg->y_size << p_jpeg->v_scale[0]) >> 3, | 2062 | (p_jpeg->y_size << p_jpeg->v_scale[0]) >> 3, |
2062 | bm->width, bm->height); | 2063 | bm->width, bm->height); |
2063 | fix_quant_tables(p_jpeg); | 2064 | fix_quant_tables(p_jpeg); |
2064 | int decode_w = (1 << p_jpeg->h_scale[0]) - 1; | 2065 | int decode_w = BIT_N(p_jpeg->h_scale[0]) - 1; |
2065 | int decode_h = (1 << p_jpeg->v_scale[0]) - 1; | 2066 | int decode_h = BIT_N(p_jpeg->v_scale[0]) - 1; |
2066 | src_dim.width = (p_jpeg->x_size << p_jpeg->h_scale[0]) >> 3; | 2067 | src_dim.width = (p_jpeg->x_size << p_jpeg->h_scale[0]) >> 3; |
2067 | src_dim.height = (p_jpeg->y_size << p_jpeg->v_scale[0]) >> 3; | 2068 | src_dim.height = (p_jpeg->y_size << p_jpeg->v_scale[0]) >> 3; |
2068 | p_jpeg->zero_need[0] = (decode_h << 3) + decode_w; | 2069 | p_jpeg->zero_need[0] = (decode_h << 3) + decode_w; |
2069 | p_jpeg->k_need[0] = zig[p_jpeg->zero_need[0]]; | 2070 | p_jpeg->k_need[0] = zig[p_jpeg->zero_need[0]]; |
2070 | JDEBUGF("need luma components to %d\n", p_jpeg->k_need[0]); | 2071 | JDEBUGF("need luma components to %d\n", p_jpeg->k_need[0]); |
2071 | #ifdef HAVE_LCD_COLOR | 2072 | #ifdef HAVE_LCD_COLOR |
2072 | decode_w = (1 << MIN(p_jpeg->h_scale[1],3)) - 1; | 2073 | decode_w = BIT_N(MIN(p_jpeg->h_scale[1],3)) - 1; |
2073 | decode_h = (1 << MIN(p_jpeg->v_scale[1],3)) - 1; | 2074 | decode_h = BIT_N(MIN(p_jpeg->v_scale[1],3)) - 1; |
2074 | p_jpeg->zero_need[1] = (decode_h << 3) + decode_w; | 2075 | p_jpeg->zero_need[1] = (decode_h << 3) + decode_w; |
2075 | p_jpeg->k_need[1] = zig[p_jpeg->zero_need[1]]; | 2076 | p_jpeg->k_need[1] = zig[p_jpeg->zero_need[1]]; |
2076 | JDEBUGF("need chroma components to %d\n", p_jpeg->k_need[1]); | 2077 | JDEBUGF("need chroma components to %d\n", p_jpeg->k_need[1]); |