diff options
author | Andree Buschmann <AndreeBuschmann@t-online.de> | 2008-04-13 17:03:24 +0000 |
---|---|---|
committer | Andree Buschmann <AndreeBuschmann@t-online.de> | 2008-04-13 17:03:24 +0000 |
commit | b90a766d0195154170b693f030f044366a15ee2c (patch) | |
tree | 430a8d66d87db4fcf563160b347e6fcea35fe2bd /firmware | |
parent | 2bf4178018930b8af6d7082f7dd1a3302e09932b (diff) | |
download | rockbox-b90a766d0195154170b693f030f044366a15ee2c.tar.gz rockbox-b90a766d0195154170b693f030f044366a15ee2c.zip |
ASM optimization for fiq_playback(). Saves about 0.4MHz of CPU while playback on PP502x/PP5002.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17097 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware')
-rw-r--r-- | firmware/target/arm/pcm-pp.c | 84 |
1 files changed, 61 insertions, 23 deletions
diff --git a/firmware/target/arm/pcm-pp.c b/firmware/target/arm/pcm-pp.c index 64c6d0cdc8..489862e5de 100644 --- a/firmware/target/arm/pcm-pp.c +++ b/firmware/target/arm/pcm-pp.c | |||
@@ -107,11 +107,17 @@ void pcm_apply_settings(void) | |||
107 | pcm_curr_sampr = pcm_freq; | 107 | pcm_curr_sampr = pcm_freq; |
108 | } | 108 | } |
109 | 109 | ||
110 | /* ASM optimised FIQ handler. GCC fails to make use of the fact that FIQ mode | 110 | /* ASM optimised FIQ handler. Checks for the minimum allowed loop cycles by evalutation of |
111 | has registers r8-r14 banked, and so does not need to be saved. This routine | 111 | * free IISFIFO-slots against available source buffer words. Through this it is possible to |
112 | uses only these registers, and so will never touch the stack unless it | 112 | * move the check for IIS_TX_FREE_COUNT outside the loop and do some further optimization. |
113 | actually needs to do so when calling pcm_callback_for_more. C version is | 113 | * Right after the loops (source buffer -> IISFIFO) are done we need to check whether we |
114 | still included below for reference and testing. | 114 | * have to exit FIQ handler (this must be done, if all free FIFO slots were filled) or |
115 | * we will have to get some new source data. | ||
116 | * Important information kept from former ASM implementation (not used anymore): GCC fails | ||
117 | * to make use of the fact that FIQ mode has registers r8-r14 banked, and so does not need | ||
118 | * to be saved. This routine uses only these registers, and so will never touch the stack | ||
119 | * unless it actually needs to do so when calling pcm_callback_for_more. C version is still | ||
120 | * included below for reference and testing. | ||
115 | */ | 121 | */ |
116 | #if 1 | 122 | #if 1 |
117 | void fiq_playback(void) ICODE_ATTR __attribute__((naked)); | 123 | void fiq_playback(void) ICODE_ATTR __attribute__((naked)); |
@@ -122,9 +128,11 @@ void fiq_playback(void) | |||
122 | * addresses we need are generated by using offsets with these two. | 128 | * addresses we need are generated by using offsets with these two. |
123 | * r10 + 0x40 is IISFIFO_WR, and r10 + 0x0c is IISFIFO_CFG. | 129 | * r10 + 0x40 is IISFIFO_WR, and r10 + 0x0c is IISFIFO_CFG. |
124 | * r8 and r9 contains local copies of p and size respectively. | 130 | * r8 and r9 contains local copies of p and size respectively. |
125 | * r12 is a working register. | 131 | * r0-r3 and r12 is a working register. |
126 | */ | 132 | */ |
127 | asm volatile ( | 133 | asm volatile ( |
134 | "stmfd sp!, { r0-r3, lr } \n" /* stack scratch regs and lr */ | ||
135 | |||
128 | #if CONFIG_CPU == PP5002 | 136 | #if CONFIG_CPU == PP5002 |
129 | "ldr r12, =0xcf001040 \n" /* Some magic from iPodLinux */ | 137 | "ldr r12, =0xcf001040 \n" /* Some magic from iPodLinux */ |
130 | "ldr r12, [r12] \n" | 138 | "ldr r12, [r12] \n" |
@@ -132,24 +140,54 @@ void fiq_playback(void) | |||
132 | "ldmia r11, { r8-r9 } \n" /* r8 = p, r9 = size */ | 140 | "ldmia r11, { r8-r9 } \n" /* r8 = p, r9 = size */ |
133 | "cmp r9, #0 \n" /* is size 0? */ | 141 | "cmp r9, #0 \n" /* is size 0? */ |
134 | "beq .more_data \n" /* if so, ask pcmbuf for more data */ | 142 | "beq .more_data \n" /* if so, ask pcmbuf for more data */ |
135 | ".fifo_loop: \n" | 143 | |
136 | "ldr r12, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */ | ||
137 | "ands r12, r12, %[mask] \n" | ||
138 | "beq .exit \n" /* FIFO full, exit */ | ||
139 | #if SAMPLE_SIZE == 16 | 144 | #if SAMPLE_SIZE == 16 |
140 | "ldr r12, [r8], #4 \n" /* load two samples */ | 145 | ".check_fifo: \n" |
141 | "str r12, [r10, %[wr]] \n" /* write them */ | 146 | "ldr r0, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */ |
147 | "and r0, r0, %[mask] \n" /* r0 = IIS_TX_FREE_COUNT << 16 (PP502x) */ | ||
148 | |||
149 | "mov r1, r0, lsr #16 \n" /* number of free FIFO slots */ | ||
150 | "cmp r1, r9, lsr #2 \n" /* number of words from source */ | ||
151 | "movgt r1, r9, lsr #2 \n" /* r1 = amount of allowed loops */ | ||
152 | "sub r9, r9, r1, lsl #2 \n" /* r1 words will be written in following loop */ | ||
153 | |||
154 | "subs r1, r1, #2 \n" | ||
155 | ".fifo_loop_2: \n" | ||
156 | "ldmgeia r8!, {r2, r12} \n" /* load four samples */ | ||
157 | "strge r2 , [r10, %[wr]] \n" /* write sample 0-1 to IISFIFO_WR */ | ||
158 | "strge r12, [r10, %[wr]] \n" /* write sample 2-3 to IISFIFO_WR */ | ||
159 | "subges r1, r1, #2 \n" /* one more loop? */ | ||
160 | "bge .fifo_loop_2 \n" /* yes, continue */ | ||
161 | |||
162 | "tst r1, #1 \n" /* two samples (one word) left? */ | ||
163 | "ldrne r12, [r8], #4 \n" /* load two samples */ | ||
164 | "strne r12, [r10, %[wr]] \n" /* write sample 0-1 to IISFIFO_WR */ | ||
165 | |||
166 | "cmp r9, #0 \n" /* either FIFO is full or source buffer is empty */ | ||
167 | "bgt .exit \n" /* if source buffer is not empty, FIFO must be full */ | ||
142 | #elif SAMPLE_SIZE == 32 | 168 | #elif SAMPLE_SIZE == 32 |
169 | ".check_fifo: \n" | ||
170 | "ldr r0, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */ | ||
171 | "and r0, r0, %[mask] \n" /* r0 = IIS_TX_FREE_COUNT << 23 (PP5002) */ | ||
172 | |||
173 | "mov r1, r0, lsr #24 \n" /* number of free pairs of FIFO slots */ | ||
174 | "cmp r1, r9, lsr #2 \n" /* number of words from source */ | ||
175 | "movgt r1, r9, lsr #2 \n" /* r1 = amount of allowed loops */ | ||
176 | "sub r9, r9, r1, lsl #2 \n" /* r1 words will be written in following loop */ | ||
177 | |||
178 | ".fifo_loop: \n" | ||
143 | "ldr r12, [r8], #4 \n" /* load two samples */ | 179 | "ldr r12, [r8], #4 \n" /* load two samples */ |
144 | "mov r12, r12, ror #16 \n" /* put left sample at the top bits */ | 180 | "mov r2 , r12, lsl #16 \n" /* put left sample at the top bits */ |
145 | "str r12, [r10, %[wr]] \n" /* write top sample, lower sample ignored */ | 181 | "str r2 , [r10, %[wr]] \n" /* write top sample to IISFIFO_WR */ |
146 | "mov r12, r12, lsl #16 \n" /* shift lower sample up */ | 182 | "str r12, [r10, %[wr]] \n" /* write low sample to IISFIFO_WR*/ |
147 | "str r12, [r10, %[wr]] \n" /* then write it */ | 183 | "subs r1, r1, #1 \n" /* one more loop? */ |
184 | "bgt .fifo_loop \n" /* yes, continue */ | ||
185 | |||
186 | "cmp r9, #0 \n" /* either FIFO is full or source buffer is empty */ | ||
187 | "bgt .exit \n" /* if source buffer is not empty, FIFO must be full */ | ||
148 | #endif | 188 | #endif |
149 | "subs r9, r9, #4 \n" /* check if we have more samples */ | 189 | |
150 | "bne .fifo_loop \n" /* yes, continue */ | ||
151 | ".more_data: \n" | 190 | ".more_data: \n" |
152 | "stmfd sp!, { r0-r3, lr } \n" /* stack scratch regs and lr */ | ||
153 | "ldr r2, =pcm_callback_for_more \n" | 191 | "ldr r2, =pcm_callback_for_more \n" |
154 | "ldr r2, [r2] \n" /* get callback address */ | 192 | "ldr r2, [r2] \n" /* get callback address */ |
155 | "cmp r2, #0 \n" /* check for null pointer */ | 193 | "cmp r2, #0 \n" /* check for null pointer */ |
@@ -160,21 +198,21 @@ void fiq_playback(void) | |||
160 | "bxne r2 \n" | 198 | "bxne r2 \n" |
161 | "ldmia r11, { r8-r9 } \n" /* reload p and size */ | 199 | "ldmia r11, { r8-r9 } \n" /* reload p and size */ |
162 | "cmp r9, #0 \n" /* did we actually get more data? */ | 200 | "cmp r9, #0 \n" /* did we actually get more data? */ |
163 | "ldmnefd sp!, { r0-r3, lr } \n" | 201 | "bne .check_fifo \n" |
164 | "bne .fifo_loop \n" /* yes, continue to try feeding FIFO */ | ||
165 | "ldr r12, =pcm_play_dma_stop \n" | 202 | "ldr r12, =pcm_play_dma_stop \n" |
166 | "mov lr, pc \n" | 203 | "mov lr, pc \n" |
167 | "bx r12 \n" | 204 | "bx r12 \n" |
168 | "ldr r12, =pcm_play_dma_stopped_callback \n" | 205 | "ldr r12, =pcm_play_dma_stopped_callback \n" |
169 | "mov lr, pc \n" | 206 | "mov lr, pc \n" |
170 | "bx r12 \n" | 207 | "bx r12 \n" |
171 | "ldmfd sp!, { r0-r3, lr } \n" | 208 | |
172 | ".exit: \n" /* (r8=0 if stopping, look above) */ | 209 | ".exit: \n" /* (r8=0 if stopping, look above) */ |
173 | "stmia r11, { r8-r9 } \n" /* save p and size */ | 210 | "stmia r11, { r8-r9 } \n" /* save p and size */ |
211 | "ldmfd sp!, { r0-r3, lr } \n" | ||
174 | "subs pc, lr, #4 \n" /* FIQ specific return sequence */ | 212 | "subs pc, lr, #4 \n" /* FIQ specific return sequence */ |
175 | ".ltorg \n" | 213 | ".ltorg \n" |
176 | : /* These must only be integers! No regs */ | 214 | : /* These must only be integers! No regs */ |
177 | : [mask]"i"(IIS_TX_FREE_MASK & (IIS_TX_FREE_MASK-1)), | 215 | : [mask]"i"(IIS_TX_FREE_MASK), |
178 | [cfg]"i"((int)&IISFIFO_CFG - (int)&IISCONFIG), | 216 | [cfg]"i"((int)&IISFIFO_CFG - (int)&IISCONFIG), |
179 | [wr]"i"((int)&IISFIFO_WR - (int)&IISCONFIG) | 217 | [wr]"i"((int)&IISFIFO_WR - (int)&IISCONFIG) |
180 | ); | 218 | ); |