From 02c031709c931da6f1ee9db0c6aadda2b37ae0aa Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Sun, 12 Jul 2009 13:14:35 +0000 Subject: * ARM asm DSP and codec/plugin functions: Use r12 scratch register properly * Fix saving another unused reg in dsp code * Use less regs in the generic ARM mpegplayer adding idct pure DC case * Fix ARMv6 mpegplayer adding idct using an unsaved register in pure DC case git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21803 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/lib/mdct_arm.S | 12 +++---- apps/codecs/libmad/synth_full_arm.S | 36 ++++++++++---------- apps/codecs/libmusepack/synth_filter_arm.S | 54 +++++++++++++++--------------- 3 files changed, 51 insertions(+), 51 deletions(-) (limited to 'apps/codecs') diff --git a/apps/codecs/lib/mdct_arm.S b/apps/codecs/lib/mdct_arm.S index f262951ea8..d082b4e932 100644 --- a/apps/codecs/lib/mdct_arm.S +++ b/apps/codecs/lib/mdct_arm.S @@ -119,7 +119,7 @@ mdct_butterfly_16: ldr pc, [sp], #4 mdct_butterfly_32: - stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + stmdb sp!, {r4-r11, lr} add r1, r0, #16*4 @@ -247,13 +247,13 @@ mdct_butterfly_32: add r0, r0, #16*4 bl mdct_butterfly_16 - ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} + ldmia sp!, {r4-r11, pc} @ mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop) mdct_butterfly_generic_loop: - stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + stmdb sp!, {r4-r11, lr} str r2, [sp, #-4] - ldr r4, [sp, #40] + ldr r4, [sp, #36] 1: ldmdb r0, {r6, r7, r8, r9} ldmdb r1, {r10, r11, r12, r14} @@ -339,7 +339,7 @@ mdct_butterfly_generic_loop: cmp r2, r4 bhi 1b - ldr r4, [sp, #40] + ldr r4, [sp, #36] 1: ldmdb r0, {r6, r7, r8, r9} ldmdb r1, {r10, r11, r12, r14} @@ -425,5 +425,5 @@ mdct_butterfly_generic_loop: cmp r2, r4 bhi 1b - ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} + ldmia sp!, {r4-r11, pc} diff --git a/apps/codecs/libmad/synth_full_arm.S b/apps/codecs/libmad/synth_full_arm.S index 99a223e784..419bf2b96e 100644 --- a/apps/codecs/libmad/synth_full_arm.S +++ b/apps/codecs/libmad/synth_full_arm.S @@ -32,8 +32,8 @@ ;; r3 = D0ptr ;; r4 = D1ptr synth_full1: - stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} - ldr r4, [sp, #40] + stmdb sp!, {r4-r11, lr} + ldr r4, [sp, #36] ldr r5, =synth_full_sp str sp, [r5] mov r5, #15 @@ -135,11 +135,11 @@ synth_full1: ldr r5, =synth_full_sp ldr sp, [r5] - ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} + ldmia sp!, {r4-r11, pc} synth_full2: - stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} - ldr r4, [sp, #40] + stmdb sp!, {r4-r11, lr} + ldr r4, [sp, #36] ldr r5, =synth_full_sp str sp, [r5] mov r5, #15 @@ -241,12 +241,12 @@ synth_full2: ldr r5, =synth_full_sp ldr sp, [r5] - ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} + ldmia sp!, {r4-r11, pc} .global III_aliasreduce III_aliasreduce: - stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + stmdb sp!, {r4-r11, lr} add r1, r0, r1, lsl #2 add r0, r0, #72 .arl1: @@ -289,7 +289,7 @@ III_aliasreduce: add r0, r0, #72 cmp r0, r1 blo .arl1 - ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} + ldmia sp!, {r4-r11, pc} csa: .word +0x0db84a81 @@ -311,14 +311,14 @@ csa: .global III_overlap III_overlap: - stmdb sp!, {r4, r5, r6, r7, r8, lr} + stmdb sp!, {r4-r7, lr} add r2, r2, r3, lsl #2 mov r3, #6 .ol: ldmia r0!, {r4, r5, r6} - ldmia r1!, {r7, r8, lr} + ldmia r1!, {r7, r12, lr} add r4, r4, r7 - add r5, r5, r8 + add r5, r5, r12 add r6, r6, lr str r4, [r2], #128 str r5, [r2], #128 @@ -326,13 +326,13 @@ III_overlap: subs r3, r3, #1 bne .ol sub r1, r1, #72 - ldmia r0!, {r4, r5, r6, r7, r8, lr} - stmia r1!, {r4, r5, r6, r7, r8, lr} - ldmia r0!, {r4, r5, r6, r7, r8, lr} - stmia r1!, {r4, r5, r6, r7, r8, lr} - ldmia r0!, {r4, r5, r6, r7, r8, lr} - stmia r1!, {r4, r5, r6, r7, r8, lr} - ldmia sp!, {r4, r5, r6, r7, r8, pc} + ldmia r0!, {r4, r5, r6, r7, r12, lr} + stmia r1!, {r4, r5, r6, r7, r12, lr} + ldmia r0!, {r4, r5, r6, r7, r12, lr} + stmia r1!, {r4, r5, r6, r7, r12, lr} + ldmia r0!, {r4, r5, r6, r7, r12, lr} + stmia r1!, {r4, r5, r6, r7, r12, lr} + ldmia sp!, {r4-r7, pc} .section IBSS_SECTION_MPA_ARM,"aw",%nobits synth_full_sp: diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S index c2b28bd06f..8c87b61609 100755 --- a/apps/codecs/libmusepack/synth_filter_arm.S +++ b/apps/codecs/libmusepack/synth_filter_arm.S @@ -9,10 +9,10 @@ * * Copyright (C) 2008 by Andree Buschmann * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. @@ -41,7 +41,7 @@ mpc_decoder_windowing_D: /* r2 = D[] */ /* lr = counter */ - stmfd sp!, {r4-r12, lr} + stmfd sp!, {r4-r11, lr} mov lr, #32 .loop32: @@ -86,7 +86,7 @@ mpc_decoder_windowing_D: subs lr, lr, #1 bgt .loop32 - ldmfd sp!, {r4-r12, pc} + ldmfd sp!, {r4-r11, pc} .mpc_dewindowing_end: .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D #else @@ -110,55 +110,55 @@ mpc_decoder_windowing_D: /************************************************************************ * Reference implementation. ***********************************************************************/ - stmfd sp!, {r4-r9, lr} + stmfd sp!, {r4-r8, lr} mov lr, #32 .loop32: ldmia r2!, { r3-r6 } /* load D[00..03] */ ldr r7, [r1] /* 0 */ - smull r8, r9, r7, r3 + smull r8, r12, r7, r3 ldr r7, [r1, #96*4] /* 1 */ - smlal r8, r9, r7, r4 + smlal r8, r12, r7, r4 ldr r7, [r1, #128*4] /* 2 */ - smlal r8, r9, r7, r5 + smlal r8, r12, r7, r5 ldr r7, [r1, #224*4] /* 3 */ - smlal r8, r9, r7, r6 + smlal r8, r12, r7, r6 ldmia r2!, { r3-r6 } /* load D[04..07] */ ldr r7, [r1, #256*4] /* 4 */ - smlal r8, r9, r7, r3 + smlal r8, r12, r7, r3 ldr r7, [r1, #352*4] /* 5 */ - smlal r8, r9, r7, r4 + smlal r8, r12, r7, r4 ldr r7, [r1, #384*4] /* 6 */ - smlal r8, r9, r7, r5 + smlal r8, r12, r7, r5 ldr r7, [r1, #480*4] /* 7 */ - smlal r8, r9, r7, r6 + smlal r8, r12, r7, r6 ldmia r2!, { r3-r6 } /* load D[08..11] */ ldr r7, [r1, #512*4] /* 8 */ - smlal r8, r9, r7, r3 + smlal r8, r12, r7, r3 ldr r7, [r1, #608*4] /* 9 */ - smlal r8, r9, r7, r4 + smlal r8, r12, r7, r4 ldr r7, [r1, #640*4] /* 10 */ - smlal r8, r9, r7, r5 + smlal r8, r12, r7, r5 ldr r7, [r1, #736*4] /* 11 */ - smlal r8, r9, r7, r6 + smlal r8, r12, r7, r6 ldmia r2!, { r3-r6 } /* load D[12..15] */ ldr r7, [r1, #768*4] /* 12 */ - smlal r8, r9, r7, r3 + smlal r8, r12, r7, r3 ldr r7, [r1, #864*4] /* 13 */ - smlal r8, r9, r7, r4 + smlal r8, r12, r7, r4 ldr r7, [r1, #896*4] /* 14 */ - smlal r8, r9, r7, r5 + smlal r8, r12, r7, r5 ldr r7, [r1, #992*4] /* 15 */ - smlal r8, r9, r7, r6 + smlal r8, r12, r7, r6 mov r8, r8, lsr #16 - orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ + orr r8, r8, r12, lsl #16 /* (lo>>16) || (hi<<16) */ str r8, [r0], #4 /* store Data */ add r1, r1, #4 /* V++ */ subs lr, lr, #1 bgt .loop32 - ldmfd sp!, {r4-r9, pc} + ldmfd sp!, {r4-r8, pc} #else mpc_decoder_windowing_D: /* r0 = Data[] */ @@ -174,7 +174,7 @@ mpc_decoder_windowing_D: * The row V[16] can be extracted as it has symmetries within this single * row. 8 smull/mlal and 8 ldr's can be saved. ***********************************************************************/ - stmfd sp!, {r4-r12, lr} + stmfd sp!, {r4-r11, lr} /****************************************** * row 0 with internal symmetry @@ -356,7 +356,7 @@ mpc_decoder_windowing_D: str r8, [r0], #4 /* store Data */ add r1, r1, #4 /* V++ */ - ldmfd sp!, {r4-r12, pc} + ldmfd sp!, {r4-r11, pc} #endif .mpc_dewindowing_end: .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D -- cgit v1.2.3