From b90a766d0195154170b693f030f044366a15ee2c Mon Sep 17 00:00:00 2001 From: Andree Buschmann Date: Sun, 13 Apr 2008 17:03:24 +0000 Subject: ASM optimization for fiq_playback(). Saves about 0.4MHz of CPU while playback on PP502x/PP5002. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17097 a1c6a512-1295-4272-9138-f99709370657 --- firmware/target/arm/pcm-pp.c | 84 ++++++++++++++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 23 deletions(-) diff --git a/firmware/target/arm/pcm-pp.c b/firmware/target/arm/pcm-pp.c index 64c6d0cdc8..489862e5de 100644 --- a/firmware/target/arm/pcm-pp.c +++ b/firmware/target/arm/pcm-pp.c @@ -107,11 +107,17 @@ void pcm_apply_settings(void) pcm_curr_sampr = pcm_freq; } -/* ASM optimised FIQ handler. GCC fails to make use of the fact that FIQ mode - has registers r8-r14 banked, and so does not need to be saved. This routine - uses only these registers, and so will never touch the stack unless it - actually needs to do so when calling pcm_callback_for_more. C version is - still included below for reference and testing. +/* ASM optimised FIQ handler. Checks for the minimum allowed loop cycles by evalutation of + * free IISFIFO-slots against available source buffer words. Through this it is possible to + * move the check for IIS_TX_FREE_COUNT outside the loop and do some further optimization. + * Right after the loops (source buffer -> IISFIFO) are done we need to check whether we + * have to exit FIQ handler (this must be done, if all free FIFO slots were filled) or + * we will have to get some new source data. + * Important information kept from former ASM implementation (not used anymore): GCC fails + * to make use of the fact that FIQ mode has registers r8-r14 banked, and so does not need + * to be saved. This routine uses only these registers, and so will never touch the stack + * unless it actually needs to do so when calling pcm_callback_for_more. C version is still + * included below for reference and testing. */ #if 1 void fiq_playback(void) ICODE_ATTR __attribute__((naked)); @@ -122,9 +128,11 @@ void fiq_playback(void) * addresses we need are generated by using offsets with these two. * r10 + 0x40 is IISFIFO_WR, and r10 + 0x0c is IISFIFO_CFG. * r8 and r9 contains local copies of p and size respectively. - * r12 is a working register. + * r0-r3 and r12 is a working register. */ asm volatile ( + "stmfd sp!, { r0-r3, lr } \n" /* stack scratch regs and lr */ + #if CONFIG_CPU == PP5002 "ldr r12, =0xcf001040 \n" /* Some magic from iPodLinux */ "ldr r12, [r12] \n" @@ -132,24 +140,54 @@ void fiq_playback(void) "ldmia r11, { r8-r9 } \n" /* r8 = p, r9 = size */ "cmp r9, #0 \n" /* is size 0? */ "beq .more_data \n" /* if so, ask pcmbuf for more data */ - ".fifo_loop: \n" - "ldr r12, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */ - "ands r12, r12, %[mask] \n" - "beq .exit \n" /* FIFO full, exit */ + #if SAMPLE_SIZE == 16 - "ldr r12, [r8], #4 \n" /* load two samples */ - "str r12, [r10, %[wr]] \n" /* write them */ + ".check_fifo: \n" + "ldr r0, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */ + "and r0, r0, %[mask] \n" /* r0 = IIS_TX_FREE_COUNT << 16 (PP502x) */ + + "mov r1, r0, lsr #16 \n" /* number of free FIFO slots */ + "cmp r1, r9, lsr #2 \n" /* number of words from source */ + "movgt r1, r9, lsr #2 \n" /* r1 = amount of allowed loops */ + "sub r9, r9, r1, lsl #2 \n" /* r1 words will be written in following loop */ + + "subs r1, r1, #2 \n" + ".fifo_loop_2: \n" + "ldmgeia r8!, {r2, r12} \n" /* load four samples */ + "strge r2 , [r10, %[wr]] \n" /* write sample 0-1 to IISFIFO_WR */ + "strge r12, [r10, %[wr]] \n" /* write sample 2-3 to IISFIFO_WR */ + "subges r1, r1, #2 \n" /* one more loop? */ + "bge .fifo_loop_2 \n" /* yes, continue */ + + "tst r1, #1 \n" /* two samples (one word) left? */ + "ldrne r12, [r8], #4 \n" /* load two samples */ + "strne r12, [r10, %[wr]] \n" /* write sample 0-1 to IISFIFO_WR */ + + "cmp r9, #0 \n" /* either FIFO is full or source buffer is empty */ + "bgt .exit \n" /* if source buffer is not empty, FIFO must be full */ #elif SAMPLE_SIZE == 32 + ".check_fifo: \n" + "ldr r0, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */ + "and r0, r0, %[mask] \n" /* r0 = IIS_TX_FREE_COUNT << 23 (PP5002) */ + + "mov r1, r0, lsr #24 \n" /* number of free pairs of FIFO slots */ + "cmp r1, r9, lsr #2 \n" /* number of words from source */ + "movgt r1, r9, lsr #2 \n" /* r1 = amount of allowed loops */ + "sub r9, r9, r1, lsl #2 \n" /* r1 words will be written in following loop */ + + ".fifo_loop: \n" "ldr r12, [r8], #4 \n" /* load two samples */ - "mov r12, r12, ror #16 \n" /* put left sample at the top bits */ - "str r12, [r10, %[wr]] \n" /* write top sample, lower sample ignored */ - "mov r12, r12, lsl #16 \n" /* shift lower sample up */ - "str r12, [r10, %[wr]] \n" /* then write it */ + "mov r2 , r12, lsl #16 \n" /* put left sample at the top bits */ + "str r2 , [r10, %[wr]] \n" /* write top sample to IISFIFO_WR */ + "str r12, [r10, %[wr]] \n" /* write low sample to IISFIFO_WR*/ + "subs r1, r1, #1 \n" /* one more loop? */ + "bgt .fifo_loop \n" /* yes, continue */ + + "cmp r9, #0 \n" /* either FIFO is full or source buffer is empty */ + "bgt .exit \n" /* if source buffer is not empty, FIFO must be full */ #endif - "subs r9, r9, #4 \n" /* check if we have more samples */ - "bne .fifo_loop \n" /* yes, continue */ + ".more_data: \n" - "stmfd sp!, { r0-r3, lr } \n" /* stack scratch regs and lr */ "ldr r2, =pcm_callback_for_more \n" "ldr r2, [r2] \n" /* get callback address */ "cmp r2, #0 \n" /* check for null pointer */ @@ -160,21 +198,21 @@ void fiq_playback(void) "bxne r2 \n" "ldmia r11, { r8-r9 } \n" /* reload p and size */ "cmp r9, #0 \n" /* did we actually get more data? */ - "ldmnefd sp!, { r0-r3, lr } \n" - "bne .fifo_loop \n" /* yes, continue to try feeding FIFO */ + "bne .check_fifo \n" "ldr r12, =pcm_play_dma_stop \n" "mov lr, pc \n" "bx r12 \n" "ldr r12, =pcm_play_dma_stopped_callback \n" "mov lr, pc \n" "bx r12 \n" - "ldmfd sp!, { r0-r3, lr } \n" + ".exit: \n" /* (r8=0 if stopping, look above) */ "stmia r11, { r8-r9 } \n" /* save p and size */ + "ldmfd sp!, { r0-r3, lr } \n" "subs pc, lr, #4 \n" /* FIQ specific return sequence */ ".ltorg \n" : /* These must only be integers! No regs */ - : [mask]"i"(IIS_TX_FREE_MASK & (IIS_TX_FREE_MASK-1)), + : [mask]"i"(IIS_TX_FREE_MASK), [cfg]"i"((int)&IISFIFO_CFG - (int)&IISCONFIG), [wr]"i"((int)&IISFIFO_WR - (int)&IISCONFIG) ); -- cgit v1.2.3