summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2008-04-13 17:03:24 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2008-04-13 17:03:24 +0000
commitb90a766d0195154170b693f030f044366a15ee2c (patch)
tree430a8d66d87db4fcf563160b347e6fcea35fe2bd
parent2bf4178018930b8af6d7082f7dd1a3302e09932b (diff)
downloadrockbox-b90a766d0195154170b693f030f044366a15ee2c.tar.gz
rockbox-b90a766d0195154170b693f030f044366a15ee2c.zip
ASM optimization for fiq_playback(). Saves about 0.4MHz of CPU while playback on PP502x/PP5002.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17097 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/target/arm/pcm-pp.c84
1 files changed, 61 insertions, 23 deletions
diff --git a/firmware/target/arm/pcm-pp.c b/firmware/target/arm/pcm-pp.c
index 64c6d0cdc8..489862e5de 100644
--- a/firmware/target/arm/pcm-pp.c
+++ b/firmware/target/arm/pcm-pp.c
@@ -107,11 +107,17 @@ void pcm_apply_settings(void)
107 pcm_curr_sampr = pcm_freq; 107 pcm_curr_sampr = pcm_freq;
108} 108}
109 109
110/* ASM optimised FIQ handler. GCC fails to make use of the fact that FIQ mode 110/* ASM optimised FIQ handler. Checks for the minimum allowed loop cycles by evalutation of
111 has registers r8-r14 banked, and so does not need to be saved. This routine 111 * free IISFIFO-slots against available source buffer words. Through this it is possible to
112 uses only these registers, and so will never touch the stack unless it 112 * move the check for IIS_TX_FREE_COUNT outside the loop and do some further optimization.
113 actually needs to do so when calling pcm_callback_for_more. C version is 113 * Right after the loops (source buffer -> IISFIFO) are done we need to check whether we
114 still included below for reference and testing. 114 * have to exit FIQ handler (this must be done, if all free FIFO slots were filled) or
115 * we will have to get some new source data.
116 * Important information kept from former ASM implementation (not used anymore): GCC fails
117 * to make use of the fact that FIQ mode has registers r8-r14 banked, and so does not need
118 * to be saved. This routine uses only these registers, and so will never touch the stack
119 * unless it actually needs to do so when calling pcm_callback_for_more. C version is still
120 * included below for reference and testing.
115 */ 121 */
116#if 1 122#if 1
117void fiq_playback(void) ICODE_ATTR __attribute__((naked)); 123void fiq_playback(void) ICODE_ATTR __attribute__((naked));
@@ -122,9 +128,11 @@ void fiq_playback(void)
122 * addresses we need are generated by using offsets with these two. 128 * addresses we need are generated by using offsets with these two.
123 * r10 + 0x40 is IISFIFO_WR, and r10 + 0x0c is IISFIFO_CFG. 129 * r10 + 0x40 is IISFIFO_WR, and r10 + 0x0c is IISFIFO_CFG.
124 * r8 and r9 contains local copies of p and size respectively. 130 * r8 and r9 contains local copies of p and size respectively.
125 * r12 is a working register. 131 * r0-r3 and r12 is a working register.
126 */ 132 */
127 asm volatile ( 133 asm volatile (
134 "stmfd sp!, { r0-r3, lr } \n" /* stack scratch regs and lr */
135
128#if CONFIG_CPU == PP5002 136#if CONFIG_CPU == PP5002
129 "ldr r12, =0xcf001040 \n" /* Some magic from iPodLinux */ 137 "ldr r12, =0xcf001040 \n" /* Some magic from iPodLinux */
130 "ldr r12, [r12] \n" 138 "ldr r12, [r12] \n"
@@ -132,24 +140,54 @@ void fiq_playback(void)
132 "ldmia r11, { r8-r9 } \n" /* r8 = p, r9 = size */ 140 "ldmia r11, { r8-r9 } \n" /* r8 = p, r9 = size */
133 "cmp r9, #0 \n" /* is size 0? */ 141 "cmp r9, #0 \n" /* is size 0? */
134 "beq .more_data \n" /* if so, ask pcmbuf for more data */ 142 "beq .more_data \n" /* if so, ask pcmbuf for more data */
135 ".fifo_loop: \n" 143
136 "ldr r12, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */
137 "ands r12, r12, %[mask] \n"
138 "beq .exit \n" /* FIFO full, exit */
139#if SAMPLE_SIZE == 16 144#if SAMPLE_SIZE == 16
140 "ldr r12, [r8], #4 \n" /* load two samples */ 145 ".check_fifo: \n"
141 "str r12, [r10, %[wr]] \n" /* write them */ 146 "ldr r0, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */
147 "and r0, r0, %[mask] \n" /* r0 = IIS_TX_FREE_COUNT << 16 (PP502x) */
148
149 "mov r1, r0, lsr #16 \n" /* number of free FIFO slots */
150 "cmp r1, r9, lsr #2 \n" /* number of words from source */
151 "movgt r1, r9, lsr #2 \n" /* r1 = amount of allowed loops */
152 "sub r9, r9, r1, lsl #2 \n" /* r1 words will be written in following loop */
153
154 "subs r1, r1, #2 \n"
155 ".fifo_loop_2: \n"
156 "ldmgeia r8!, {r2, r12} \n" /* load four samples */
157 "strge r2 , [r10, %[wr]] \n" /* write sample 0-1 to IISFIFO_WR */
158 "strge r12, [r10, %[wr]] \n" /* write sample 2-3 to IISFIFO_WR */
159 "subges r1, r1, #2 \n" /* one more loop? */
160 "bge .fifo_loop_2 \n" /* yes, continue */
161
162 "tst r1, #1 \n" /* two samples (one word) left? */
163 "ldrne r12, [r8], #4 \n" /* load two samples */
164 "strne r12, [r10, %[wr]] \n" /* write sample 0-1 to IISFIFO_WR */
165
166 "cmp r9, #0 \n" /* either FIFO is full or source buffer is empty */
167 "bgt .exit \n" /* if source buffer is not empty, FIFO must be full */
142#elif SAMPLE_SIZE == 32 168#elif SAMPLE_SIZE == 32
169 ".check_fifo: \n"
170 "ldr r0, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */
171 "and r0, r0, %[mask] \n" /* r0 = IIS_TX_FREE_COUNT << 23 (PP5002) */
172
173 "mov r1, r0, lsr #24 \n" /* number of free pairs of FIFO slots */
174 "cmp r1, r9, lsr #2 \n" /* number of words from source */
175 "movgt r1, r9, lsr #2 \n" /* r1 = amount of allowed loops */
176 "sub r9, r9, r1, lsl #2 \n" /* r1 words will be written in following loop */
177
178 ".fifo_loop: \n"
143 "ldr r12, [r8], #4 \n" /* load two samples */ 179 "ldr r12, [r8], #4 \n" /* load two samples */
144 "mov r12, r12, ror #16 \n" /* put left sample at the top bits */ 180 "mov r2 , r12, lsl #16 \n" /* put left sample at the top bits */
145 "str r12, [r10, %[wr]] \n" /* write top sample, lower sample ignored */ 181 "str r2 , [r10, %[wr]] \n" /* write top sample to IISFIFO_WR */
146 "mov r12, r12, lsl #16 \n" /* shift lower sample up */ 182 "str r12, [r10, %[wr]] \n" /* write low sample to IISFIFO_WR*/
147 "str r12, [r10, %[wr]] \n" /* then write it */ 183 "subs r1, r1, #1 \n" /* one more loop? */
184 "bgt .fifo_loop \n" /* yes, continue */
185
186 "cmp r9, #0 \n" /* either FIFO is full or source buffer is empty */
187 "bgt .exit \n" /* if source buffer is not empty, FIFO must be full */
148#endif 188#endif
149 "subs r9, r9, #4 \n" /* check if we have more samples */ 189
150 "bne .fifo_loop \n" /* yes, continue */
151 ".more_data: \n" 190 ".more_data: \n"
152 "stmfd sp!, { r0-r3, lr } \n" /* stack scratch regs and lr */
153 "ldr r2, =pcm_callback_for_more \n" 191 "ldr r2, =pcm_callback_for_more \n"
154 "ldr r2, [r2] \n" /* get callback address */ 192 "ldr r2, [r2] \n" /* get callback address */
155 "cmp r2, #0 \n" /* check for null pointer */ 193 "cmp r2, #0 \n" /* check for null pointer */
@@ -160,21 +198,21 @@ void fiq_playback(void)
160 "bxne r2 \n" 198 "bxne r2 \n"
161 "ldmia r11, { r8-r9 } \n" /* reload p and size */ 199 "ldmia r11, { r8-r9 } \n" /* reload p and size */
162 "cmp r9, #0 \n" /* did we actually get more data? */ 200 "cmp r9, #0 \n" /* did we actually get more data? */
163 "ldmnefd sp!, { r0-r3, lr } \n" 201 "bne .check_fifo \n"
164 "bne .fifo_loop \n" /* yes, continue to try feeding FIFO */
165 "ldr r12, =pcm_play_dma_stop \n" 202 "ldr r12, =pcm_play_dma_stop \n"
166 "mov lr, pc \n" 203 "mov lr, pc \n"
167 "bx r12 \n" 204 "bx r12 \n"
168 "ldr r12, =pcm_play_dma_stopped_callback \n" 205 "ldr r12, =pcm_play_dma_stopped_callback \n"
169 "mov lr, pc \n" 206 "mov lr, pc \n"
170 "bx r12 \n" 207 "bx r12 \n"
171 "ldmfd sp!, { r0-r3, lr } \n" 208
172 ".exit: \n" /* (r8=0 if stopping, look above) */ 209 ".exit: \n" /* (r8=0 if stopping, look above) */
173 "stmia r11, { r8-r9 } \n" /* save p and size */ 210 "stmia r11, { r8-r9 } \n" /* save p and size */
211 "ldmfd sp!, { r0-r3, lr } \n"
174 "subs pc, lr, #4 \n" /* FIQ specific return sequence */ 212 "subs pc, lr, #4 \n" /* FIQ specific return sequence */
175 ".ltorg \n" 213 ".ltorg \n"
176 : /* These must only be integers! No regs */ 214 : /* These must only be integers! No regs */
177 : [mask]"i"(IIS_TX_FREE_MASK & (IIS_TX_FREE_MASK-1)), 215 : [mask]"i"(IIS_TX_FREE_MASK),
178 [cfg]"i"((int)&IISFIFO_CFG - (int)&IISCONFIG), 216 [cfg]"i"((int)&IISFIFO_CFG - (int)&IISCONFIG),
179 [wr]"i"((int)&IISFIFO_WR - (int)&IISCONFIG) 217 [wr]"i"((int)&IISFIFO_WR - (int)&IISCONFIG)
180 ); 218 );