diff options
author | Michael Sevakis <jethead71@rockbox.org> | 2017-09-07 15:41:52 -0400 |
---|---|---|
committer | Michael Sevakis <jethead71@rockbox.org> | 2017-09-07 15:45:55 -0400 |
commit | c6d5cd74a866901eb8f6e69e642f617e5810e0c6 (patch) | |
tree | bfa16a09051e0d7f0298575d3f5b34bcf33d9cf3 | |
parent | 28591f2e92cff7edb8da4339b61b15305419863a (diff) | |
download | rockbox-c6d5cd74a866901eb8f6e69e642f617e5810e0c6.tar.gz rockbox-c6d5cd74a866901eb8f6e69e642f617e5810e0c6.zip |
ARM support: provide compiler a better popcount function
Just the 32-bit one for now. The default uses lookup tables and is
ungainly and bloated.
Change-Id: I4a2eb31defb1f4d6f6853b65fe6dacc380d6ffc0
-rw-r--r-- | lib/arm_support/support-arm.S | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/lib/arm_support/support-arm.S b/lib/arm_support/support-arm.S index 6141a3f158..df49dc49e8 100644 --- a/lib/arm_support/support-arm.S +++ b/lib/arm_support/support-arm.S | |||
@@ -701,3 +701,34 @@ __aeabi_idivmod: | |||
701 | .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89 | 701 | .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89 |
702 | .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81 | 702 | .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81 |
703 | #endif | 703 | #endif |
704 | |||
705 | /* | ||
706 | * int __popcountsi2(unsigned int x) | ||
707 | * int __popcountdi2(unsigned long x) | ||
708 | */ | ||
709 | .section .text.__popcountsi2, "ax", %progbits | ||
710 | .global __popcountsi2 | ||
711 | .type __popcountsi2, %function | ||
712 | .global __popcountdi2 | ||
713 | .type __popcountdi2, %function | ||
714 | .set __popcountdi2, __popcountsi2 | ||
715 | __popcountsi2: | ||
716 | mov r1, #0x33 @ r1 = 0x33333333 | ||
717 | orr r1, r1, r1, lsl #8 @ ... | ||
718 | orr r1, r1, r1, lsl #16 @ ... | ||
719 | eor r2, r1, r1, lsl #1 @ r2 = 0x55555555 | ||
720 | and r2, r2, r0, lsr #1 @ r2 = (x >> 1) & 0x55555555 | ||
721 | sub r0, r0, r2 @ x = x - ((x >> 1) & 0x55555555) | ||
722 | and r2, r1, r0 @ r2 = x & 0x33333333 | ||
723 | and r1, r1, r0, lsr #2 @ r1 = (x >> 2) & 0x33333333 | ||
724 | add r0, r2, r1 @ x = (x & 0x33333333) + ((x >> 2) & 0x33333333) | ||
725 | mov r1, #0x0f @ r1 = 0x0f0f0f0f | ||
726 | orr r1, r1, r1, lsl #8 @ ... | ||
727 | orr r1, r1, r1, lsl #16 @ ... | ||
728 | add r0, r0, lsr #4 @ x = x + (x >> 4) | ||
729 | and r0, r0, r1 @ x = (x + (x >> 4)) & 0x0f0f0f0f | ||
730 | add r0, r0, lsr #16 @ x = x + (x >> 16) | ||
731 | add r0, r0, lsr #8 @ x = x + (x >> 8) | ||
732 | and r0, r0, #0x3f @ x &= 0x3f | ||
733 | bx lr @ return x | ||
734 | .size __popcountsi2, .-__popcountsi2 | ||