From c4f88526c73b3da2bda03aa40de06b3535fe266b Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Fri, 4 Jun 2010 23:12:33 +0000 Subject: Port greylib blitting optimisation to clipv2 and Clip+. Actual speedup can't be measured because something is fishy with the cpu clocking (calculated load is negative??) git-svn-id: svn://svn.rockbox.org/rockbox/trunk@26562 a1c6a512-1295-4272-9138-f99709370657 --- .../arm/as3525/sansa-clipplus/lcd-as-clip-plus.S | 45 ++++++++-------------- .../target/arm/as3525/sansa-clipv2/lcd-as-clipv2.S | 45 ++++++++-------------- 2 files changed, 30 insertions(+), 60 deletions(-) diff --git a/firmware/target/arm/as3525/sansa-clipplus/lcd-as-clip-plus.S b/firmware/target/arm/as3525/sansa-clipplus/lcd-as-clip-plus.S index 4ffbb9252c..7dcdc9f0da 100644 --- a/firmware/target/arm/as3525/sansa-clipplus/lcd-as-clip-plus.S +++ b/firmware/target/arm/as3525/sansa-clipplus/lcd-as-clip-plus.S @@ -56,43 +56,28 @@ lcd_grey_data: ldr lr, =SSP_BASE .greyloop: - ldmia r1, {r3-r4} /* Fetch 8 pixel phases */ - ldmia r0!, {r5-r6} /* Fetch 8 pixel values */ - - mov r7, #0 - - /* set bits 7..4 */ - tst r3, #0x80 - orrne r7, r7, #0x80 - tst r3, #0x8000 - orrne r7, r7, #0x40 - tst r3, #0x800000 - orrne r7, r7, #0x20 - tst r3, #0x80000000 - orrne r7, r7, #0x10 + ldmia r1, {r3-r4} + + and r5, r12, r3 @ r5 = 3.......2.......1.......0....... + and r6, r12, r4 @ r6 = 7.......6.......5.......4....... + orr r5, r5, r6, lsr #4 @ r5 = 3...7...2...6...1...5...0...4... + orr r5, r5, r5, lsr #9 @ r5 = 3...7...23..67..12..56..01..45.. + orr r5, r5, r5, lsr #9 @ r5 = 3...7...23..67..123.567.012.456. + orr r5, r5, r5, lsr #9 @ r5 = 3...7...23..67..123.567.01234567 + + ldmia r0!, {r6-r7} bic r3, r3, r12 - add r3, r3, r5 - - /* set bits 3..0 */ - tst r4, #0x80 - orrne r7, r7, #0x08 - tst r4, #0x8000 - orrne r7, r7, #0x04 - tst r4, #0x800000 - orrne r7, r7, #0x02 - tst r4, #0x80000000 - orrne r7, r7, #0x01 + add r3, r3, r6 bic r4, r4, r12 - add r4, r4, r6 - + add r4, r4, r7 stmia r1!, {r3-r4} 1: - ldr r5, [lr, #0xC] @ SSP_SR - ands r5, r5, #(1<<1) @ wait until transmit fifo isn't full + ldr r6, [lr, #0xC] @ SSP_SR + ands r6, r6, #(1<<1) @ wait until transmit fifo isn't full beq 1b - strb r7, [lr, #0x08] @ SSP_DATA + strb r5, [lr, #0x08] @ SSP_DATA subs r2, r2, #1 bne .greyloop diff --git a/firmware/target/arm/as3525/sansa-clipv2/lcd-as-clipv2.S b/firmware/target/arm/as3525/sansa-clipv2/lcd-as-clipv2.S index 4281519491..23f1db3109 100644 --- a/firmware/target/arm/as3525/sansa-clipv2/lcd-as-clipv2.S +++ b/firmware/target/arm/as3525/sansa-clipv2/lcd-as-clipv2.S @@ -56,42 +56,27 @@ lcd_grey_data: ldr lr, =DBOP_BASE .greyloop: - ldmia r1, {r3-r4} /* Fetch 8 pixel phases */ - ldmia r0!, {r5-r6} /* Fetch 8 pixel values */ - - mov r7, #0 - - /* set bits 7..3 */ - tst r3, #0x80 - orrne r7, r7, #0x80 - tst r3, #0x8000 - orrne r7, r7, #0x40 - tst r3, #0x800000 - orrne r7, r7, #0x20 - tst r3, #0x80000000 - orrne r7, r7, #0x10 + ldmia r1, {r3-r4} + + and r5, r12, r3 @ r5 = 3.......2.......1.......0....... + and r6, r12, r4 @ r6 = 7.......6.......5.......4....... + orr r5, r5, r6, lsr #4 @ r5 = 3...7...2...6...1...5...0...4... + orr r5, r5, r5, lsr #9 @ r5 = 3...7...23..67..12..56..01..45.. + orr r5, r5, r5, lsr #9 @ r5 = 3...7...23..67..123.567.012.456. + orr r5, r5, r5, lsr #9 @ r5 = 3...7...23..67..123.567.01234567 + + ldmia r0!, {r6-r7} bic r3, r3, r12 - add r3, r3, r5 - - /* set bits 3..0 */ - tst r4, #0x80 - orrne r7, r7, #0x08 - tst r4, #0x8000 - orrne r7, r7, #0x04 - tst r4, #0x800000 - orrne r7, r7, #0x02 - tst r4, #0x80000000 - orrne r7, r7, #0x01 + add r3, r3, r6 bic r4, r4, r12 - add r4, r4, r6 - + add r4, r4, r7 stmia r1!, {r3-r4} - strb r7, [lr, #0x10] @ DBOP_DOUT + strb r5, [lr, #0x10] @ DBOP_DOUT 1: - ldr r5, [lr, #0xC] @ DBOP_STAT - ands r5, r5, #(1<<6) @ wait until push fifo is full + ldr r6, [lr, #0xC] @ DBOP_STAT + ands r6, r6, #(1<<6) @ wait until push fifo is full bne 1b subs r2, r2, #1 -- cgit v1.2.3