From 86429dbf1eca8ee0e08176997f508647c3abf6bd Mon Sep 17 00:00:00 2001 From: Chris Chua Date: Sun, 19 Mar 2023 06:22:08 +1100 Subject: Using ARM Unified Assembler Language Change-Id: Iae32a8ba8eff6087330e458fafc912a12fee4509 --- apps/plugins/mpegplayer/libmpeg2/idct_arm.S | 6 ++-- apps/plugins/mpegplayer/libmpeg2/idct_armv6.S | 7 +++-- apps/plugins/mpegplayer/mpeg_misc.h | 22 ++++++++------- apps/recorder/jpeg_idct_arm.S | 12 ++++---- firmware/asm/arm/corelock.c | 3 +- firmware/asm/arm/lcd-as-memframe.S | 4 +-- firmware/asm/arm/memcpy.S | 20 +++++++------- firmware/asm/arm/memmove.S | 20 +++++++------- firmware/asm/arm/memset.S | 26 +++++++++--------- firmware/asm/arm/memset16.S | 20 +++++++------- firmware/asm/arm/thread.c | 5 ++-- firmware/export/config.h | 5 ++-- firmware/target/arm/ata-as-arm.S | 32 +++++++++++----------- firmware/target/arm/ipod/video/lcd-as-video.S | 26 +++++++++--------- firmware/target/arm/pcm-telechips.c | 5 ++-- firmware/target/arm/pp/pcm-pp.c | 7 +++-- lib/arm_support/support-arm.S | 2 +- lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S | 2 +- .../codecs/demac/libdemac/vector_math16_armv6.h | 23 +++++++++------- lib/rbcodec/codecs/libtta/filter_arm.S | 4 +-- lib/rbcodec/dsp/dsp_arm.S | 6 ++-- lib/rbcodec/dsp/dsp_arm_v6.S | 5 ++-- lib/unwarminder/safe_read.S | 4 +-- 23 files changed, 139 insertions(+), 127 deletions(-) diff --git a/apps/plugins/mpegplayer/libmpeg2/idct_arm.S b/apps/plugins/mpegplayer/libmpeg2/idct_arm.S index 97a87a8b59..90eb5031c7 100644 --- a/apps/plugins/mpegplayer/libmpeg2/idct_arm.S +++ b/apps/plugins/mpegplayer/libmpeg2/idct_arm.S @@ -43,8 +43,8 @@ ldrsh r7, [r0, #12] /* d2 */ ldrsh r8, [r0, #14] /* d3 */ orrs r9, r2, r3 - orreqs r9, r4, r5 - orreqs r9, r6, r7 + orrseq r9, r4, r5 + orrseq r9, r6, r7 cmpeq r8, #0 bne 2f mov r1, r1, asl #15 @@ -320,7 +320,7 @@ mpeg2_idct_copy: mpeg2_idct_add: cmp r0, #129 mov r0, r1 - ldreqsh r1, [r0, #0] + ldrsheq r1, [r0, #0] bne 1f and r1, r1, #0x70 cmp r1, #0x40 diff --git a/apps/plugins/mpegplayer/libmpeg2/idct_armv6.S b/apps/plugins/mpegplayer/libmpeg2/idct_armv6.S index dc53cbd7bd..a259721410 100644 --- a/apps/plugins/mpegplayer/libmpeg2/idct_armv6.S +++ b/apps/plugins/mpegplayer/libmpeg2/idct_armv6.S @@ -19,6 +19,7 @@ * ****************************************************************************/ +#include "config.h" .global mpeg2_idct_copy .type mpeg2_idct_copy, %function @@ -228,7 +229,7 @@ mpeg2_idct_copy: mpeg2_idct_add: cmp r0, #129 mov r0, r1 - ldreqsh r1, [r0, #0] + ldrsheq r1, [r0, #0] bne 1f and r1, r1, #0x70 cmp r1, #0x40 @@ -260,7 +261,7 @@ mpeg2_idct_add: strd r4, [r1] @ r4, r5 add r1, r1, r2 cmp r0, r3 - ldrlod r8, [r1] @ r8, r9 + ldrdlo r8, [r1] @ r8, r9 blo 2b ldmfd sp!, {r4-r11, pc} @@ -291,7 +292,7 @@ mpeg2_idct_add: strd r0, [r2] @ r0, r1 add r2, r2, r3 cmp r2, r12 - ldrlod r0, [r2] @ r0, r1 + ldrdlo r0, [r2] @ r0, r1 blo 4b ldmfd sp!, {r4, pc} diff --git a/apps/plugins/mpegplayer/mpeg_misc.h b/apps/plugins/mpegplayer/mpeg_misc.h index e04db0e19d..68ee8cac3c 100644 --- a/apps/plugins/mpegplayer/mpeg_misc.h +++ b/apps/plugins/mpegplayer/mpeg_misc.h @@ -53,12 +53,13 @@ enum state_enum #define CMP_3_CONST(_a, _b) \ ({ int _x; \ asm volatile ( \ + ".syntax unified \n" \ "ldrb %[x], [%[a], #0] \n" \ "eors %[x], %[x], %[b0] \n" \ - "ldreqb %[x], [%[a], #1] \n" \ - "eoreqs %[x], %[x], %[b1] \n" \ - "ldreqb %[x], [%[a], #2] \n" \ - "eoreqs %[x], %[x], %[b2] \n" \ + "ldrbeq %[x], [%[a], #1] \n" \ + "eorseq %[x], %[x], %[b1] \n" \ + "ldrbeq %[x], [%[a], #2] \n" \ + "eorseq %[x], %[x], %[b2] \n" \ : [x]"=&r"(_x) \ : [a]"r"(_a), \ [b0]"i"(((_b) >> 24) & 0xff), \ @@ -70,14 +71,15 @@ enum state_enum #define CMP_4_CONST(_a, _b) \ ({ int _x; \ asm volatile ( \ + ".syntax unified \n" \ "ldrb %[x], [%[a], #0] \n" \ "eors %[x], %[x], %[b0] \n" \ - "ldreqb %[x], [%[a], #1] \n" \ - "eoreqs %[x], %[x], %[b1] \n" \ - "ldreqb %[x], [%[a], #2] \n" \ - "eoreqs %[x], %[x], %[b2] \n" \ - "ldreqb %[x], [%[a], #3] \n" \ - "eoreqs %[x], %[x], %[b3] \n" \ + "ldrbeq %[x], [%[a], #1] \n" \ + "eorseq %[x], %[x], %[b1] \n" \ + "ldrbeq %[x], [%[a], #2] \n" \ + "eorseq %[x], %[x], %[b2] \n" \ + "ldrbeq %[x], [%[a], #3] \n" \ + "eorseq %[x], %[x], %[b3] \n" \ : [x]"=&r"(_x) \ : [a]"r"(_a), \ [b0]"i"(((_b) >> 24) & 0xff), \ diff --git a/apps/recorder/jpeg_idct_arm.S b/apps/recorder/jpeg_idct_arm.S index e7eb4b87f1..1f2603da1b 100644 --- a/apps/recorder/jpeg_idct_arm.S +++ b/apps/recorder/jpeg_idct_arm.S @@ -410,7 +410,7 @@ jpeg_idct8v: #if ARM_ARCH < 5 mov r8, r4, lsl #16 orrs r9, r6, r7 - orreqs r9, r5, r4, lsr #16 + orrseq r9, r5, r4, lsr #16 bne 2f mov r8, r8, asr #14 strh r8, [r2] @@ -505,7 +505,7 @@ jpeg_idct8v: #else /* ARMv5+ */ mov r12, r4, lsl #16 orrs r9, r6, r7 - orreqs r9, r5, r4, lsr #16 + orrseq r9, r5, r4, lsr #16 bne 2f mov r12, r12, asr #14 strh r12, [r2] @@ -615,7 +615,7 @@ jpeg_idct8h: #if ARM_ARCH < 5 add r8, r14, r4, lsl #16 orrs r9, r6, r7 - orreqs r9, r5, r4, lsr #16 + orrseq r9, r5, r4, lsr #16 bne 2f mov r8, r8, asr #21 cmp r8, #255 @@ -727,7 +727,7 @@ jpeg_idct8h: #else /* ARMv5+ */ add r12, r14, r4, lsl #16 orrs r9, r6, r7 - orreqs r9, r5, r4, lsr #16 + orrseq r9, r5, r4, lsr #16 bne 2f mov r12, r12, asr #21 cmp r12, #255 @@ -835,7 +835,7 @@ jpeg_idct8v: 1: ldmia r0!, { r4-r7 } orrs r9, r6, r7 - orreqs r9, r5, r4, lsr #16 + orrseq r9, r5, r4, lsr #16 bne 2f mov r4, r4, lsl #2 strh r4, [r2] @@ -939,7 +939,7 @@ jpeg_idct8h: ldmia r0!, { r4-r7 } sadd16 r4, r4, r14 orrs r9, r6, r7 - orreqs r9, r5, r4, lsr #16 + orrseq r9, r5, r4, lsr #16 bne 2f sxth r4, r4 usat r4, #8, r4, asr #5 diff --git a/firmware/asm/arm/corelock.c b/firmware/asm/arm/corelock.c index b36a40b45b..07ec77a60e 100644 --- a/firmware/asm/arm/corelock.c +++ b/firmware/asm/arm/corelock.c @@ -61,6 +61,7 @@ int corelock_try_lock(struct corelock *cl) /* Relies on the fact that core IDs are complementary bitmasks (0x55,0xaa) */ asm volatile ( + ".syntax unified \n" "mov r1, %[id] \n" /* r1 = PROCESSOR_ID */ "ldrb r1, [r1] \n" "strb r1, [%[cl], r1, lsr #7] \n" /* cl->myl[core] = core */ @@ -71,7 +72,7 @@ int corelock_try_lock(struct corelock *cl) "bne 1f \n" /* yes? lock acquired */ "ldrb %[rv], [%[cl], #2] \n" /* || cl->turn == core? */ "ands %[rv], %[rv], r1 \n" - "streqb %[rv], [%[cl], r1, lsr #7] \n" /* if not, cl->myl[core] = 0 */ + "strbeq %[rv], [%[cl], r1, lsr #7] \n" /* if not, cl->myl[core] = 0 */ "1: \n" /* Done */ : [rv] "=r"(rval) : [id] "i" (&PROCESSOR_ID), [cl] "r" (cl) diff --git a/firmware/asm/arm/lcd-as-memframe.S b/firmware/asm/arm/lcd-as-memframe.S index 52ab0447c2..d42b2a920d 100644 --- a/firmware/asm/arm/lcd-as-memframe.S +++ b/firmware/asm/arm/lcd-as-memframe.S @@ -91,9 +91,9 @@ lcd_copy_buffer_rect: @ stmia r0!, { r6-r12, r14 } @ bgt 30b @ octword loop @ 40: @ finish line @ - ldreqh r6, [r1], #2 @ finish last halfword if eq ... + ldrheq r6, [r1], #2 @ finish last halfword if eq ... add r1, r1, r4, lsl #1 @ - streqh r6, [r0], #2 @ ... + strheq r6, [r0], #2 @ ... add r0, r0, r4, lsl #1 @ subs r3, r3, #1 @ next line bgt 10b @ copy line @ diff --git a/firmware/asm/arm/memcpy.S b/firmware/asm/arm/memcpy.S index 83d43293e6..86fc6b7930 100644 --- a/firmware/asm/arm/memcpy.S +++ b/firmware/asm/arm/memcpy.S @@ -99,22 +99,22 @@ memcpy: 7: ldmfd sp!, {r5 - r8} 8: movs r2, r2, lsl #31 - ldrneb r3, [r1], #1 - ldrcsb r4, [r1], #1 - ldrcsb ip, [r1] - strneb r3, [r0], #1 - strcsb r4, [r0], #1 - strcsb ip, [r0] + ldrbne r3, [r1], #1 + ldrbcs r4, [r1], #1 + ldrbcs ip, [r1] + strbne r3, [r0], #1 + strbcs r4, [r0], #1 + strbcs ip, [r0] ldmpc regs="r0, r4" 9: rsb ip, ip, #4 cmp ip, #2 - ldrgtb r3, [r1], #1 - ldrgeb r4, [r1], #1 + ldrbgt r3, [r1], #1 + ldrbge r4, [r1], #1 ldrb lr, [r1], #1 - strgtb r3, [r0], #1 - strgeb r4, [r0], #1 + strbgt r3, [r0], #1 + strbge r4, [r0], #1 subs r2, r2, ip strb lr, [r0], #1 blt 8b diff --git a/firmware/asm/arm/memmove.S b/firmware/asm/arm/memmove.S index d8cab048be..e5c9b42928 100644 --- a/firmware/asm/arm/memmove.S +++ b/firmware/asm/arm/memmove.S @@ -106,20 +106,20 @@ memmove: 7: ldmfd sp!, {r5 - r8} 8: movs r2, r2, lsl #31 - ldrneb r3, [r1, #-1]! - ldrcsb r4, [r1, #-1]! - ldrcsb ip, [r1, #-1] - strneb r3, [r0, #-1]! - strcsb r4, [r0, #-1]! - strcsb ip, [r0, #-1] + ldrbne r3, [r1, #-1]! + ldrbcs r4, [r1, #-1]! + ldrbcs ip, [r1, #-1] + strbne r3, [r0, #-1]! + strbcs r4, [r0, #-1]! + strbcs ip, [r0, #-1] ldmpc regs="r0, r4" 9: cmp ip, #2 - ldrgtb r3, [r1, #-1]! - ldrgeb r4, [r1, #-1]! + ldrbgt r3, [r1, #-1]! + ldrbge r4, [r1, #-1]! ldrb lr, [r1, #-1]! - strgtb r3, [r0, #-1]! - strgeb r4, [r0, #-1]! + strbgt r3, [r0, #-1]! + strbge r4, [r0, #-1]! subs r2, r2, ip strb lr, [r0, #-1]! blt 8b diff --git a/firmware/asm/arm/memset.S b/firmware/asm/arm/memset.S index 64cd95cc9e..d727f2a5ec 100644 --- a/firmware/asm/arm/memset.S +++ b/firmware/asm/arm/memset.S @@ -34,8 +34,8 @@ 1: cmp r2, #4 @ 1 do we have enough blt 5f @ 1 bytes to align with? cmp r3, #2 @ 1 - strgtb r1, [r0, #-1]! @ 1 - strgeb r1, [r0, #-1]! @ 1 + strbgt r1, [r0, #-1]! @ 1 + strbge r1, [r0, #-1]! @ 1 strb r1, [r0, #-1]! @ 1 sub r2, r2, r3 @ 1 r2 = r2 - r3 b 2f @@ -65,24 +65,24 @@ memset: mov lr, r1 3: subs r2, r2, #64 - stmgedb r0!, {r1, r3, ip, lr} @ 64 bytes at a time. - stmgedb r0!, {r1, r3, ip, lr} - stmgedb r0!, {r1, r3, ip, lr} - stmgedb r0!, {r1, r3, ip, lr} + stmdbge r0!, {r1, r3, ip, lr} @ 64 bytes at a time. + stmdbge r0!, {r1, r3, ip, lr} + stmdbge r0!, {r1, r3, ip, lr} + stmdbge r0!, {r1, r3, ip, lr} bgt 3b ldrpc cond=eq @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ tst r2, #32 - stmnedb r0!, {r1, r3, ip, lr} - stmnedb r0!, {r1, r3, ip, lr} + stmdbne r0!, {r1, r3, ip, lr} + stmdbne r0!, {r1, r3, ip, lr} tst r2, #16 - stmnedb r0!, {r1, r3, ip, lr} + stmdbne r0!, {r1, r3, ip, lr} ldr lr, [sp], #4 5: tst r2, #8 - stmnedb r0!, {r1, r3} + stmdbne r0!, {r1, r3} tst r2, #4 strne r1, [r0, #-4]! /* @@ -90,10 +90,10 @@ memset: * may have an unaligned pointer as well. */ 6: tst r2, #2 - strneb r1, [r0, #-1]! - strneb r1, [r0, #-1]! + strbne r1, [r0, #-1]! + strbne r1, [r0, #-1]! tst r2, #1 - strneb r1, [r0, #-1]! + strbne r1, [r0, #-1]! bx lr .end: .size memset,.end-memset diff --git a/firmware/asm/arm/memset16.S b/firmware/asm/arm/memset16.S index 5c787b1bed..226eac39e1 100644 --- a/firmware/asm/arm/memset16.S +++ b/firmware/asm/arm/memset16.S @@ -35,7 +35,7 @@ memset16: tst r0, #2 @ unaligned? cmpne r2, #0 - strneh r1, [r0], #2 @ store one halfword to align + strhne r1, [r0], #2 @ store one halfword to align subne r2, r2, #1 /* @@ -54,29 +54,29 @@ memset16: mov lr, r1 2: subs r2, r2, #32 - stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} + stmiage r0!, {r1, r3, ip, lr} @ 64 bytes at a time. + stmiage r0!, {r1, r3, ip, lr} + stmiage r0!, {r1, r3, ip, lr} + stmiage r0!, {r1, r3, ip, lr} bgt 2b ldrpc cond=eq @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ tst r2, #16 - stmneia r0!, {r1, r3, ip, lr} - stmneia r0!, {r1, r3, ip, lr} + stmiane r0!, {r1, r3, ip, lr} + stmiane r0!, {r1, r3, ip, lr} tst r2, #8 - stmneia r0!, {r1, r3, ip, lr} + stmiane r0!, {r1, r3, ip, lr} ldr lr, [sp], #4 4: tst r2, #4 - stmneia r0!, {r1, r3} + stmiane r0!, {r1, r3} tst r2, #2 strne r1, [r0], #4 tst r2, #1 - strneh r1, [r0], #2 + strhne r1, [r0], #2 bx lr .end: .size memset16,.end-memset16 diff --git a/firmware/asm/arm/thread.c b/firmware/asm/arm/thread.c index cf685526e3..bd9f950616 100644 --- a/firmware/asm/arm/thread.c +++ b/firmware/asm/arm/thread.c @@ -73,15 +73,16 @@ static inline void store_context(void* addr) static inline void load_context(const void* addr) { asm volatile( + ".syntax unified \n" "ldr r0, [%0, #40] \n" /* Load start pointer */ "cmp r0, #0 \n" /* Check for NULL */ /* If not already running, jump to start */ #if ARM_ARCH == 4 && defined(USE_THUMB) - "ldmneia %0, { r0, r12 } \n" + "ldmiane %0, { r0, r12 } \n" "bxne r12 \n" #else - "ldmneia %0, { r0, pc } \n" + "ldmiane %0, { r0, pc } \n" #endif "ldmia %0, { r4-r11, sp, lr } \n" /* Load regs r4 to r14 from context */ diff --git a/firmware/export/config.h b/firmware/export/config.h index 0882cad61c..19ee03b4c7 100644 --- a/firmware/export/config.h +++ b/firmware/export/config.h @@ -1006,13 +1006,14 @@ Lyre prototype 1 */ #endif #if defined(CPU_ARM) && defined(__ASSEMBLER__) +.syntax unified /* ARMv4T doesn't switch the T bit when popping pc directly, we must use BX */ .macro ldmpc cond="", order="ia", regs #if ARM_ARCH == 4 && defined(USE_THUMB) - ldm\cond\order sp!, { \regs, lr } + ldm\order\cond sp!, { \regs, lr } bx\cond lr #else - ldm\cond\order sp!, { \regs, pc } + ldm\order\cond sp!, { \regs, pc } #endif .endm .macro ldrpc cond="" diff --git a/firmware/target/arm/ata-as-arm.S b/firmware/target/arm/ata-as-arm.S index 101bc4dcc1..16c2928bf1 100644 --- a/firmware/target/arm/ata-as-arm.S +++ b/firmware/target/arm/ata-as-arm.S @@ -139,9 +139,9 @@ copy_read_sectors: .r_end2_u: tst r1, #1 /* one halfword left? */ - ldrneh r4, [r2] + ldrhne r4, [r2] orrne r3, r3, r4, lsl #8 - strneh r3, [r0], #2 + strhne r3, [r0], #2 movne r3, r4, lsr #8 strb r3, [r0], #1 /* store final byte */ @@ -151,8 +151,8 @@ copy_read_sectors: /* 16-bit aligned */ .r_aligned: tst r0, #2 /* 32 bit aligned? */ - ldrneh r3, [r2] /* no: read first halfword */ - strneh r3, [r0], #2 /* store */ + ldrhne r3, [r2] /* no: read first halfword */ + strhne r3, [r0], #2 /* store */ subne r1, r1, #1 /* one halfword taken */ sub r1, r1, #8 /* adjust for zero-check and doing 8 halfwords/loop */ @@ -186,14 +186,14 @@ copy_read_sectors: .r_end4_a: tst r1, #2 /* 2 or more halfwords left? */ - ldrneh r3, [r2] - ldrneh r4, [r2] + ldrhne r3, [r2] + ldrhne r4, [r2] orrne r3, r3, r4, lsl #16 strne r3, [r0], #4 tst r1, #1 /* one halfword left? */ - ldrneh r3, [r2] - strneh r3, [r0], #2 + ldrhne r3, [r2] + strhne r3, [r0], #2 ldmpc regs=r4-r5 @@ -291,9 +291,9 @@ copy_write_sectors: .w_end2_u: tst r1, #1 /* one halfword left? */ - ldrneh r4, [r0], #2 + ldrhne r4, [r0], #2 orrne r3, r3, r4, lsl #8 - strneh r3, [r2] + strhne r3, [r2] movne r3, r3, lsr #16 ldrb r4, [r0], #1 /* load final byte */ @@ -305,8 +305,8 @@ copy_write_sectors: /* 16-bit aligned */ .w_aligned: tst r0, #2 /* 32 bit aligned? */ - ldrneh r3, [r0], #2 /* no: load first halfword */ - strneh r3, [r2] /* write */ + ldrhne r3, [r0], #2 /* no: load first halfword */ + strhne r3, [r2] /* write */ subne r1, r1, #1 /* one halfword taken */ sub r1, r1, #8 /* adjust for zero-check and doing 8 halfwords/loop */ @@ -341,13 +341,13 @@ copy_write_sectors: tst r1, #2 /* 2 or more halfwords left? */ ldrne r3, [r0], #4 - strneh r3, [r2] + strhne r3, [r2] movne r3, r3, lsr #16 - strneh r3, [r2] + strhne r3, [r2] tst r1, #1 /* one halfword left? */ - ldrneh r3, [r0], #2 - strneh r3, [r2] + ldrhne r3, [r0], #2 + strhne r3, [r2] ldmpc regs=r4-r5 diff --git a/firmware/target/arm/ipod/video/lcd-as-video.S b/firmware/target/arm/ipod/video/lcd-as-video.S index 47155b8c75..7d6caef448 100644 --- a/firmware/target/arm/ipod/video/lcd-as-video.S +++ b/firmware/target/arm/ipod/video/lcd-as-video.S @@ -40,24 +40,24 @@ lcd_write_data: /* r1 = pixel count, must be even */ subs r1, r1, #16 .loop16: - ldmgeia r0!, {r2-r3} - stmgeia lr, {r2-r3} - ldmgeia r0!, {r2-r3} - stmgeia lr, {r2-r3} - ldmgeia r0!, {r2-r3} - stmgeia lr, {r2-r3} - ldmgeia r0!, {r2-r3} - stmgeia lr, {r2-r3} - subges r1, r1, #16 + ldmiage r0!, {r2-r3} + stmiage lr, {r2-r3} + ldmiage r0!, {r2-r3} + stmiage lr, {r2-r3} + ldmiage r0!, {r2-r3} + stmiage lr, {r2-r3} + ldmiage r0!, {r2-r3} + stmiage lr, {r2-r3} + subsge r1, r1, #16 bge .loop16 /* no need to correct the count, we're just checking bits from now */ tst r1, #8 - ldmneia r0!, {r2-r4, r12} - stmneia lr, {r2-r4, r12} + ldmiane r0!, {r2-r4, r12} + stmiane lr, {r2-r4, r12} tst r1, #4 - ldmneia r0!, {r2-r3} - stmneia lr, {r2-r3} + ldmiane r0!, {r2-r3} + stmiane lr, {r2-r3} tst r1, #2 ldrne r3, [r0], #4 strne r3, [lr] diff --git a/firmware/target/arm/pcm-telechips.c b/firmware/target/arm/pcm-telechips.c index 336b5626ca..747765d8fb 100644 --- a/firmware/target/arm/pcm-telechips.c +++ b/firmware/target/arm/pcm-telechips.c @@ -218,6 +218,7 @@ void fiq_handler(void) * r0-r3 and r12 is a working register. */ asm volatile ( + ".syntax unified \n" "sub lr, lr, #4 \n" "stmfd sp!, { r0-r3, lr } \n" /* stack scratch regs and lr */ "mov r14, #0 \n" /* Was the callback called? */ @@ -251,7 +252,7 @@ void fiq_handler(void) "stmia r11, { r8-r9 } \n" /* save p and size */ "cmp r14, #0 \n" /* Callback called? */ - "ldmeqfd sp!, { r0-r3, pc }^ \n" /* no? -> exit */ + "ldmfdeq sp!, { r0-r3, pc }^ \n" /* no? -> exit */ "ldr r1, =pcm_play_status_callback \n" "ldr r1, [r1] \n" @@ -268,7 +269,7 @@ void fiq_handler(void) "mov lr, pc \n" "ldr pc, =pcm_play_dma_complete_callback \n" "cmp r0, #0 \n" /* any more to play? */ - "ldmneia r11, { r8-r9 } \n" /* load new p and size */ + "ldmiane r11, { r8-r9 } \n" /* load new p and size */ "cmpne r9, #0x0f \n" /* did we actually get enough data? */ "bhi .fill_fifo \n" /* not stop and enough? refill */ "ldmfd sp!, { r0-r3, pc }^ \n" /* exit */ diff --git a/firmware/target/arm/pp/pcm-pp.c b/firmware/target/arm/pp/pcm-pp.c index 0d61eb44ff..fd798f0506 100644 --- a/firmware/target/arm/pp/pcm-pp.c +++ b/firmware/target/arm/pp/pcm-pp.c @@ -327,6 +327,7 @@ void fiq_playback(void) */ asm volatile ( /* No external calls */ + ".syntax unified \n" "sub lr, lr, #4 \n" /* Prepare return address */ "stmfd sp!, { lr } \n" /* stack lr so we can use it */ "ldr r12, =0xcf001040 \n" /* Some magic from iPodLinux ... */ @@ -349,8 +350,8 @@ void fiq_playback(void) "bhi 0b \n" /* ... yes, continue */ "cmp r9, #0 \n" /* either FIFO full or size empty? */ - "stmneia r11, { r8-r9 } \n" /* save p and size, if not empty */ - "ldmnefd sp!, { pc }^ \n" /* RFE if not empty */ + "stmiane r11, { r8-r9 } \n" /* save p and size, if not empty */ + "ldmfdne sp!, { pc }^ \n" /* RFE if not empty */ /* Making external calls */ "1: \n" @@ -363,7 +364,7 @@ void fiq_playback(void) "mov lr, pc \n" /* long call (not in same section) */ "bx r3 \n" "cmp r0, #0 \n" /* more data? */ - "ldmeqfd sp!, { r0-r3, pc }^ \n" /* no? -> exit */ + "ldmfdeq sp!, { r0-r3, pc }^ \n" /* no? -> exit */ "ldr r14, [r10, #0x1c] \n" /* read IISFIFO_CFG to check FIFO status */ "ands r14, r14, #(0xe<<23) \n" /* r14 = (IIS_TX_FREE_COUNT & ~1) << 23 */ diff --git a/lib/arm_support/support-arm.S b/lib/arm_support/support-arm.S index f99d086b0b..3af0c70975 100644 --- a/lib/arm_support/support-arm.S +++ b/lib/arm_support/support-arm.S @@ -227,7 +227,7 @@ /* Test whether divisor is 2^N */ cmp \inv, #1<<31 /* Load approximate reciprocal */ - ldrhib \inv, [\neg, #.L_udiv_est_table-.-64] + ldrbhi \inv, [\neg, #.L_udiv_est_table-.-64] bls 20f subs \bits, \bits, #7 rsb \neg, \divisor, #0 diff --git a/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S index 7b851659bd..1d19160a91 100644 --- a/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S +++ b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S @@ -225,7 +225,7 @@ udiv32_arm: mov \inv, \divisor, lsl \bits add \neg, pc, \inv, lsr #25 cmp \inv, #1<<31 - ldrhib \inv, [\neg, #.L_udiv_est_table-.-64] + ldrbhi \inv, [\neg, #.L_udiv_est_table-.-64] bls 20f subs \bits, \bits, #7 rsb \neg, \divisor, #0 diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h index 8d27331b62..1da090efbb 100644 --- a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h +++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h @@ -45,6 +45,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) #endif asm volatile ( + ".syntax unified \n" #if ORDER > 32 "mov %[res], #0 \n" #endif @@ -117,7 +118,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) "smladx %[res], r1, r2, %[res] \n" #if ORDER > 32 "subs %[cnt], %[cnt], #1 \n" - "ldmneia %[f2]!, {r2,r4} \n" + "ldmiane %[f2]!, {r2,r4} \n" "sadd16 r0, r0, r7 \n" "sadd16 r1, r1, r5 \n" "strd r0, [%[v1]], #8 \n" @@ -172,8 +173,8 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) "smlad %[res], r3, r5, %[res] \n" #if ORDER > 32 "subs %[cnt], %[cnt], #1 \n" - "ldrned r4, [%[f2]], #8 \n" - "ldrned r0, [%[v1], #8] \n" + "ldrdne r4, [%[f2]], #8 \n" + "ldrdne r0, [%[v1], #8] \n" "sadd16 r2, r2, r6 \n" "sadd16 r3, r3, r7 \n" "strd r2, [%[v1]], #8 \n" @@ -214,6 +215,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) #endif asm volatile ( + ".syntax unified \n" #if ORDER > 32 "mov %[res], #0 \n" #endif @@ -286,7 +288,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) "smladx %[res], r1, r2, %[res] \n" #if ORDER > 32 "subs %[cnt], %[cnt], #1 \n" - "ldmneia %[f2]!, {r2,r4} \n" + "ldmiane %[f2]!, {r2,r4} \n" "ssub16 r0, r0, r7 \n" "ssub16 r1, r1, r5 \n" "strd r0, [%[v1]], #8 \n" @@ -341,8 +343,8 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) "smlad %[res], r3, r5, %[res] \n" #if ORDER > 32 "subs %[cnt], %[cnt], #1 \n" - "ldrned r4, [%[f2]], #8 \n" - "ldrned r0, [%[v1], #8] \n" + "ldrdne r4, [%[f2]], #8 \n" + "ldrdne r0, [%[v1], #8] \n" "ssub16 r2, r2, r6 \n" "ssub16 r3, r3, r7 \n" "strd r2, [%[v1]], #8 \n" @@ -381,6 +383,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) #endif asm volatile ( + ".syntax unified \n" #if ORDER > 32 "mov %[res], #0 \n" #endif @@ -421,10 +424,10 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) "pkhtb r1, r7, r4 \n" #if ORDER > 32 "subs %[cnt], %[cnt], #1 \n" - "ldrned r6, [%[v2]], #8 \n" + "ldrdne r6, [%[v2]], #8 \n" "smladx %[res], r2, r1, %[res] \n" "pkhtb r2, r4, r5 \n" - "ldrned r0, [%[v1]], #8 \n" + "ldrdne r0, [%[v1]], #8 \n" "smladx %[res], r3, r2, %[res] \n" "bne 1b \n" #else @@ -461,9 +464,9 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) "ldrd r4, [%[v2]], #8 \n" "smlad %[res], r1, r6, %[res] \n" "subs %[cnt], %[cnt], #1 \n" - "ldrned r0, [%[v1]], #8 \n" + "ldrdne r0, [%[v1]], #8 \n" "smlad %[res], r2, r7, %[res] \n" - "ldrned r6, [%[v2]], #8 \n" + "ldrdne r6, [%[v2]], #8 \n" "smlad %[res], r3, r4, %[res] \n" "bne 1b \n" #else diff --git a/lib/rbcodec/codecs/libtta/filter_arm.S b/lib/rbcodec/codecs/libtta/filter_arm.S index f3959b83ca..10f1491796 100644 --- a/lib/rbcodec/codecs/libtta/filter_arm.S +++ b/lib/rbcodec/codecs/libtta/filter_arm.S @@ -165,8 +165,8 @@ hybrid_filter: sub r10, r11, r10 @ set to the memory: *pA, *(pA-1), *(pA-2), *(pA-3), *pM, *(pM-1), *(pM-2), *(pM-3) - stmneda r2, {r10, r11, r12, lr} - stmneda r3, {r5, r6, r7, r8} + stmdane r2, {r10, r11, r12, lr} + stmdane r3, {r5, r6, r7, r8} ldmpc cond=ne regs=r4-r12 @ hybrid_filter end (when fs->index != 0) .hf_memshl: diff --git a/lib/rbcodec/dsp/dsp_arm.S b/lib/rbcodec/dsp/dsp_arm.S index 16394b8690..864abee4b6 100644 --- a/lib/rbcodec/dsp/dsp_arm.S +++ b/lib/rbcodec/dsp/dsp_arm.S @@ -323,7 +323,7 @@ resample_hermite: add r6, r6, r0, lsl #2 @ r6 = &s[pos] cmp r0, #3 @ pos >= 3? history not needed - ldmgedb r6, { r1-r3 } @ x3..x1 = s[pos-3]..s[pos-1] + ldmdbge r6, { r1-r3 } @ x3..x1 = s[pos-3]..s[pos-1] bge .hrs_loadhist_done @ add r10, r0, r0, lsl #1 @ branch pc + pos*12 add pc, pc, r10, lsl #2 @ @@ -496,7 +496,7 @@ resample_hermite: ldmfd sp!, { r10, r12 } @ recover ch, h subs r10, r10, #1 @ --ch stmia r12!, { r1-r3 } @ h[0..2] = x3..x1 - ldmgtia sp, { r0-r2 } @ load data, src, dst + ldmiagt sp, { r0-r2 } @ load data, src, dst bgt .hrs_channel_loop ldmfd sp!, { r1-r3 } @ pop data, src, dst @@ -614,7 +614,7 @@ filter_process: ldr r0, [sp] @ r0 = history[channels-ch-1] subs r3, r3, #1 @ all channels processed? stmia r0!, { r9-r12 } @ save back history, history++ - ldmhsib sp, { r1-r2 } @ r1 = buf, r2 = count + ldmibhs sp, { r1-r2 } @ r1 = buf, r2 = count strhs r3, [sp, #12] @ store ch strhs r0, [sp] @ store history[channels-ch-1] bhs .fp_channelloop diff --git a/lib/rbcodec/dsp/dsp_arm_v6.S b/lib/rbcodec/dsp/dsp_arm_v6.S index a36760f744..aa27ec90f6 100644 --- a/lib/rbcodec/dsp/dsp_arm_v6.S +++ b/lib/rbcodec/dsp/dsp_arm_v6.S @@ -18,6 +18,7 @@ * KIND, either express or implied. * ****************************************************************************/ + #include "rbcodecconfig.h" /**************************************************************************** * void sample_output_mono(struct sample_io_data *this, @@ -56,7 +57,7 @@ sample_output_mono: stmia r3!, { r12, r14 } @ store So0, So1 bgt 1b @ @ - ldmltfd sp!, { r4, pc } @ if count was even, we're done + ldmfdlt sp!, { r4, pc } @ if count was even, we're done @ 2: @ ldr r12, [r2] @ round, scale, saturate @@ -113,7 +114,7 @@ sample_output_stereo: stmia r3!, { r6, r7 } @ store So0, So1 bgt 1b @ @ - ldmltfd sp!, { r4-r7, pc } @ if count was even, we're done + ldmfdlt sp!, { r4-r7, pc } @ if count was even, we're done @ 2: @ ldr r6, [r2] @ r6 = Li diff --git a/lib/unwarminder/safe_read.S b/lib/unwarminder/safe_read.S index 1969c7e0c6..ce4913870c 100644 --- a/lib/unwarminder/safe_read.S +++ b/lib/unwarminder/safe_read.S @@ -45,7 +45,7 @@ safe_read8_faulty_addr: @ if(value != NULL) cmp r1, #0 @ *value = r0 - strneb r0, [r1] + strbne r0, [r1] @ return true; mov r0, #1 bx lr @@ -72,7 +72,7 @@ safe_read16_faulty_addr: @ if(value != NULL) cmp r1, #0 @ *value = r0 - strneh r0, [r1] + strhne r0, [r1] @ return true; mov r0, #1 bx lr -- cgit v1.2.3