From cf333a61c7861361b4025cb0f67c8f75b0b07eef Mon Sep 17 00:00:00 2001
From: Thomas Martitz <kugel@rockbox.org>
Date: Tue, 17 Jan 2012 07:36:08 +0100
Subject: Move supprt-arm.S to separate library.

Core, codecs and plugins link it separately so this gets rid of SOURCES trickery.
Don't build it for hosted targets.

Change-Id: If15ef90e93cd218a4352ae8e89eea95d3122452f
---
 apps/codecs/codecs.make                   |   2 +-
 apps/codecs/lib/SOURCES                   |   4 -
 apps/plugins/imageviewer/imageviewer.make |   2 +-
 apps/plugins/lib/SOURCES                  |   1 -
 apps/plugins/plugins.make                 |   4 +-
 firmware/SOURCES                          |   1 -
 firmware/target/arm/support-arm.S         | 703 ------------------------------
 lib/arm_support/arm_support.make          |  17 +
 lib/arm_support/support-arm.S             | 703 ++++++++++++++++++++++++++++++
 tools/root.make                           |  26 +-
 10 files changed, 743 insertions(+), 720 deletions(-)
 delete mode 100644 firmware/target/arm/support-arm.S
 create mode 100644 lib/arm_support/arm_support.make
 create mode 100644 lib/arm_support/support-arm.S

diff --git a/apps/codecs/codecs.make b/apps/codecs/codecs.make
index d3525791cd..e18b92c1f0 100644
--- a/apps/codecs/codecs.make
+++ b/apps/codecs/codecs.make
@@ -201,7 +201,7 @@ $(CODECDIR)/%-pre.map: $(CODEC_CRT0) $(CODECLINK_LDS) $(CODECDIR)/%.o $(CODECLIB
 		$(CODECLIB) \
 		-lgcc $(subst .map,-pre.map,$(CODECLDFLAGS))
 
-$(CODECDIR)/%.codec: $(CODECDIR)/%.o $(LIBSETJMP)
+$(CODECDIR)/%.codec: $(CODECDIR)/%.o $(LIBSETJMP) $(LIBARMSUPPORT)
 	$(call PRINTS,LD $(@F))$(CC) $(CODECFLAGS) -o $(CODECDIR)/$*.elf \
 		$(filter %.o, $^) \
 		$(filter %.a, $+) \
diff --git a/apps/codecs/lib/SOURCES b/apps/codecs/lib/SOURCES
index cd38dc1545..257dcb5838 100644
--- a/apps/codecs/lib/SOURCES
+++ b/apps/codecs/lib/SOURCES
@@ -7,10 +7,6 @@ mdct_lookup.c
 fft-ffmpeg.c
 mdct.c
 
-#ifdef CPU_ARM
-../../../firmware/target/arm/support-arm.S
-#endif
-
 #elif (CONFIG_PLATFORM & PLATFORM_HOSTED) && defined(__APPLE__)
 osx.dummy.c
 #endif
diff --git a/apps/plugins/imageviewer/imageviewer.make b/apps/plugins/imageviewer/imageviewer.make
index d06bbfd571..8f933e3046 100644
--- a/apps/plugins/imageviewer/imageviewer.make
+++ b/apps/plugins/imageviewer/imageviewer.make
@@ -47,7 +47,7 @@ else
 endif
 
 # rule to create reference map for image decoder
-$(IMGVBUILDDIR)/%.refmap: $(APPSDIR)/plugin.h $(IMGVSRCDIR)/imageviewer.h $(PLUGINLINK_LDS) $(PLUGINLIB) $(PLUGINBITMAPLIB)
+$(IMGVBUILDDIR)/%.refmap: $(APPSDIR)/plugin.h $(IMGVSRCDIR)/imageviewer.h $(PLUGINLINK_LDS) $(PLUGINLIB) $(LIBARMSUPPORT) $(PLUGINBITMAPLIB)
 	$(call PRINTS,LD $(@F))$(CC) $(IMGDECFLAGS) -o /dev/null \
 		$(filter %.o, $^) \
 		$(filter %.a, $+) \
diff --git a/apps/plugins/lib/SOURCES b/apps/plugins/lib/SOURCES
index 2c0fc2a611..4b33901088 100644
--- a/apps/plugins/lib/SOURCES
+++ b/apps/plugins/lib/SOURCES
@@ -41,7 +41,6 @@ playergfx.c
 #ifdef HAVE_LCD_BITMAP
 
 #ifdef CPU_ARM
-../../../firmware/target/arm/support-arm.S
 pluginlib_jpeg_idct_arm.S
 #endif
 
diff --git a/apps/plugins/plugins.make b/apps/plugins/plugins.make
index fca37aa763..1bf78d82c1 100644
--- a/apps/plugins/plugins.make
+++ b/apps/plugins/plugins.make
@@ -65,7 +65,7 @@ PLUGINFLAGS = -I$(APPSDIR)/plugins -DPLUGIN $(CFLAGS)
 $(ROCKS1): $(BUILDDIR)/%.rock: $(BUILDDIR)/%.o
 
 # dependency for all plugins
-$(ROCKS): $(APPSDIR)/plugin.h $(PLUGINLINK_LDS) $(PLUGINLIB) $(PLUGINBITMAPLIB) $(PLUGIN_CRT0) $(LIBSETJMP)
+$(ROCKS): $(APPSDIR)/plugin.h $(PLUGINLINK_LDS) $(PLUGINLIB) $(PLUGINBITMAPLIB) $(PLUGIN_CRT0) $(LIBSETJMP) $(LIBARMSUPPORT)
 
 $(PLUGINLIB): $(PLUGINLIB_OBJ)
 	$(SILENT)$(shell rm -f $@)
@@ -129,7 +129,7 @@ endif
 $(BUILDDIR)/apps/plugins/%.lua: $(ROOTDIR)/apps/plugins/%.lua
 	$(call PRINTS,CP $(subst $(ROOTDIR)/,,$<))cp $< $(BUILDDIR)/apps/plugins/
 
-$(BUILDDIR)/%.refmap: $(APPSDIR)/plugin.h $(OVERLAYREF_LDS) $(PLUGINLIB) $(PLUGINBITMAPLIB) $(LIBSETJMP) $(PLUGIN_CRT0)
+$(BUILDDIR)/%.refmap: $(APPSDIR)/plugin.h $(OVERLAYREF_LDS) $(PLUGINLIB) $(PLUGINBITMAPLIB) $(LIBSETJMP) $(LIBARMSUPPORT) $(PLUGIN_CRT0)
 	$(call PRINTS,LD $(@F))$(CC) $(PLUGINFLAGS) -o /dev/null \
 		$(filter %.o, $^) \
 		$(filter %.a, $+) \
diff --git a/firmware/SOURCES b/firmware/SOURCES
index 438c9e6526..f2036734a5 100644
--- a/firmware/SOURCES
+++ b/firmware/SOURCES
@@ -454,7 +454,6 @@ target/coldfire/ata-as-coldfire.S
 
 #elif defined(CPU_PP) || defined(CPU_ARM)
 /* CPU_PP => CPU_ARM, CPU_ARM !=> CPU_PP */
-target/arm/support-arm.S
 target/arm/memcpy-arm.S
 target/arm/memmove-arm.S
 
diff --git a/firmware/target/arm/support-arm.S b/firmware/target/arm/support-arm.S
deleted file mode 100644
index 6141a3f158..0000000000
--- a/firmware/target/arm/support-arm.S
+++ /dev/null
@@ -1,703 +0,0 @@
-/***************************************************************************
- *             __________               __   ___.
- *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
- *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
- *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
- *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
- *                     \/            \/     \/    \/            \/
- * $Id$
- *
- * Copyright (C) 2008 by Jens Arnold
- * Copyright (C) 2009 by Andrew Mahone
- *
- * Optimised replacements for libgcc functions
- *
- * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System
- *           Developer's Guide
- * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
- * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
- * Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ****************************************************************************/
-
-#include <config.h>
-
-.macro ARM_SDIV32_PRE numerator, divisor, sign
-    /* sign[31] = divisor sign */
-    ands    \sign, \divisor, #1<<31
-    rsbeq   \divisor, \divisor, #0
-    /* sign[31] = result sign, sign[0:30], C = numerator sign */
-    eors    \sign, \sign, \numerator, asr #32
-    rsbcs   \numerator, \numerator, #0
-.endm
-
-.macro ARM_SDIV32_POST quotient, remainder, sign
-    movs    \sign, \sign, lsl #1
-.ifnc "", "\quotient"
-    rsbcs   \quotient, \quotient, #0
-.endif
-.ifnc "", "\remainder"
-    rsbmi   \remainder, \remainder, #0
-.endif
-.endm
-
-#if ARM_ARCH < 5
-.macro ARMV4_UDIV32_BODY numerator, divisor, quotient, remainder, tmp, bits, div0label, return
-.ifnc "", "\div0label"
-    rsbs    \divisor, \divisor, #0
-    beq     \div0label
-.else
-    rsb     \divisor, \divisor, #0
-.endif
-    /* This SWAR divider requires a numerator less than 1<<31, because it must
-       be able to shift the remainder left at each step without shifting out
-       topmost bit. Since a shift might be needed for the aligned remainder to
-       exceed the divisor, the topmost bit must be unset at the start to avoid
-       this overflow case. The original numerator is saved so that the result
-       can be corrected after the reduced division completes. */
-    cmn     \numerator, \divisor
-.ifc "", "\quotient"
-.ifc "\numerator", "\remainder"
-.if \return
-    bxcc    lr
-.else
-    b 99f
-.endif
-.else
-    bcc     20f
-.endif
-.else
-    bcc     20f
-.endif
-    movs    \tmp, \numerator
-    movmi   \numerator, \numerator, lsr #1
-    mov     \bits, #30
-.set shift, 16
-.rept 5
-    cmn     \divisor, \numerator, lsr #shift
-    subcs   \bits, \bits, #shift
-    movcs   \divisor, \divisor, lsl #shift
-.set shift, shift >> 1
-.endr
-    adds    \numerator, \numerator, \divisor
-    subcc   \numerator, \numerator, \divisor
-    add     pc, pc, \bits, lsl #3
-    nop
-.rept 30
-    adcs    \numerator, \divisor, \numerator, lsl #1
-    subcc   \numerator, \numerator, \divisor
-.endr
-    adc     \numerator, \numerator, \numerator
-    movs    \tmp, \tmp, asr #1
-    rsb     \bits, \bits, #31
-    bmi     10f
-.ifc "", "\quotient"
-    mov     \remainder, \numerator, lsr \bits
-.else
-.ifc "", "\remainder"
-    mov     \divisor, \numerator, lsr \bits
-    eor     \quotient, \numerator, \divisor, lsl \bits
-.else
-    mov     \remainder, \numerator, lsr \bits
-    eor     \quotient, \numerator, \remainder, lsl \bits
-.endif
-.endif
-.ifne \return
-    bx      lr
-.else
-    b       99f
-.endif
-10:
-    mov     \tmp, \numerator, lsr \bits
-    eor     \numerator, \numerator, \tmp, lsl \bits
-    sub     \bits, \bits, #1
-    adc     \tmp, \tmp, \tmp
-    adds    \tmp, \tmp, \divisor, asr \bits
-.ifnc "", "\quotient"
-    adc     \quotient, \numerator, \numerator
-.endif
-.ifnc "", "\remainder"
-    subcc   \remainder, \tmp, \divisor, asr \bits
-    movcs   \remainder, \tmp
-.endif
-.ifne \return
-    bx      lr
-.else
-    b       99f
-.endif
-20:
-.ifnc "", "\remainder"
-.ifnc "\remainder", "\numerator"
-    mov     \remainder, \numerator
-.endif
-.endif
-.ifnc "", "\quotient"
-    mov   \quotient, #0
-.endif
-.ifne \return
-    bx      lr
-.else
-99:
-.endif
-.endm
-
-.macro ARMV4_SDIV32_BODY numerator, divisor, quotient, remainder, bits, sign, div0label, return
-    /* When this is wrapped for signed division, the wrapper code will handle
-       inverting the divisor, and also the zero divisor test. */
-    ARM_SDIV32_PRE \numerator, \divisor, \sign
-.ifnc "",   "\div0label"
-    tst     \divisor, \divisor
-    beq     \div0label
-.endif
-    /* This SWAR divider requires a numerator less than 1<<31, because it must
-       be able to shift the remainder left at each step without shifting out
-       topmost bit. With signed inputs, whose absolute value may not exceed
-       1<<31,this may be accomplished simply by subtracting the divisor before
-       beginning division, and adding 1 to the quotient. */
-    adds    \numerator, \numerator, \divisor
-    bcc     20f
-    mov     \bits, #30
-.set shift, 16
-.rept 5
-    cmn     \divisor, \numerator, lsr #shift
-    subcs   \bits, \bits, #shift
-    movcs   \divisor, \divisor, lsl #shift
-.set shift, shift >> 1
-.endr
-    adds    \numerator, \numerator, \divisor
-    subcc   \numerator, \numerator, \divisor
-    add     pc, pc, \bits, lsl #3
-    nop
-.rept 30
-    adcs    \numerator, \divisor, \numerator, lsl #1
-    subcc   \numerator, \numerator, \divisor
-.endr
-    rsb     \bits, \bits, #31
-    adc     \numerator, \numerator, \numerator
-.ifc "", "\quotient"
-    mov     \remainder, \numerator, lsr \bits
-.else
-.ifc "", "\remainder"
-    mov     \divisor, \numerator, lsr \bits
-    add     \numerator, \numerator, #1
-    sub     \quotient, \numerator, \divisor, lsl \bits
-.else
-    mov     \remainder, \numerator, lsr \bits
-    add     \numerator, \numerator, #1
-    sub     \quotient, \numerator, \remainder, lsl \bits
-.endif
-.endif
-.ifne \return
-    ARM_SDIV32_POST \quotient, \remainder, \sign
-    bx      lr
-.else
-    b       99f
-.endif
-20:
-.ifnc "", "\remainder"
-    sub     \remainder, \numerator, \divisor
-.endif
-.ifnc "", "\quotient"
-    mov     \quotient, #0
-.endif
-.ifne \return
-    ARM_SDIV32_POST "", \remainder, \sign
-    bx      lr
-.else
-99:
-    ARM_SDIV32_POST \quotient, \remainder, \sign
-.endif
-.endm
-
-#else
-.macro ARMV5_UDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, div0label, return
-    cmp     \numerator, \divisor
-    clz     \bits, \divisor
-    bcc     30f
-    mov     \inv, \divisor, lsl \bits
-    add     \neg, pc, \inv, lsr #25
-    /* Test whether divisor is 2^N */
-    cmp     \inv, #1<<31
-    /* Load approximate reciprocal */
-    ldrhib  \inv, [\neg, #.L_udiv_est_table-.-64]
-    bls     20f
-    subs    \bits, \bits, #7
-    rsb     \neg, \divisor, #0
-    /* Scale approximate reciprocal, or else branch to large-divisor path */
-    movpl   \divisor, \inv, lsl \bits
-    bmi     10f
-    /* Newton-Raphson iteration to improve reciprocal accuracy */
-    mul     \inv, \divisor, \neg
-    smlawt  \divisor, \divisor, \inv, \divisor
-    mul     \inv, \divisor, \neg
-    /* Complete N-R math and produce approximate quotient. Use smmla/smmul on
-       ARMv6. */
-#if ARM_ARCH >= 6
-    tst     \numerator, \numerator
-    smmla   \divisor, \divisor, \inv, \divisor
-    /* Branch to large-numerator handler, or else use smmul if sign bit is not
-       set. This wins on average with random numerators, and should be no
-       slower than using umull for small numerator, even if prediction fails.
-    */
-    bmi     40f
-    smmul   \inv, \numerator, \divisor
-#else
-    /* ARMv5e lacks smmul, so always uses umull. */
-    mov     \bits, #0
-    smlal   \bits, \divisor, \inv, \divisor
-    umull   \bits, \inv, \numerator, \divisor
-#endif
-    /* Calculate remainder and correct result. */
-    add     \numerator, \numerator, \neg
-.ifnc "", "\remainder"
-    mla     \remainder, \inv, \neg, \numerator
-.ifnc "", "\quotient"
-    mov     \quotient, \inv
-    cmn     \remainder, \neg
-    subcs   \remainder, \remainder, \neg
-    addpl   \remainder, \remainder, \neg, lsl #1
-    addcc   \quotient, \quotient, #1
-    addpl   \quotient, \quotient, #2
-.else
-    cmn     \remainder, \neg
-    subcs   \remainder, \remainder, \neg
-    addpl   \remainder, \remainder, \neg, lsl #1
-.endif
-.else
-    mla     \divisor, \inv, \neg, \numerator
-    mov     \quotient, \inv
-    cmn     \divisor, \neg
-    addcc   \quotient, \quotient, #1
-    addpl   \quotient, \quotient, #2
-.endif
-.if \return
-    bx      lr
-.else
-    b       99f
-.endif
-10:
-    /* Very large divisors can be handled without further improving the
-       reciprocal. First the reciprocal must be reduced to ensure that it
-       underestimates the correct value. */
-    rsb     \bits, \bits, #0
-    sub     \inv, \inv, #4
-    mov     \divisor, \inv, lsr \bits
-    /* Calculate approximate quotient and remainder */
-    umull   \bits, \inv, \numerator, \divisor
-    /* Correct quotient and remainder */
-.ifnc "", "\remainder"
-    mla     \remainder, \inv, \neg, \numerator
-.ifnc "", "\quotient"
-    mov     \quotient, \inv
-    cmn     \neg, \remainder, lsr #1
-    addcs   \remainder, \remainder, \neg, lsl #1
-    addcs   \quotient, \quotient, #2
-    cmn     \neg, \remainder
-    addcs   \remainder, \remainder, \neg
-    addcs   \quotient, \quotient, #1
-.else
-    cmn     \neg, \remainder, lsr #1
-    addcs   \remainder, \remainder, \neg, lsl #1
-    cmn     \neg, \remainder
-    addcs   \remainder, \remainder, \neg
-.endif
-.else
-    mla     \divisor, \inv, \neg, \numerator
-    mov     \quotient, \inv
-    cmn     \neg, \divisor, lsr #1
-    addcs   \divisor, \divisor, \neg, lsl #1
-    addcs   \quotient, \quotient, #2
-    cmn     \neg, \divisor
-    addcs   \quotient, \quotient, #1
-.endif
-.if \return
-    bx      lr
-.else
-    b       99f
-.endif
-20:
-    /* Handle division by powers of two by shifting right. Mod is handled
-       by using divisor-1 as a bitmask. */
-.ifnc "", "\remainder"
-.ifnc "", "\div0label"
-    bne     \div0label
-.endif
-.ifnc "", "\quotient"
-    sub     \divisor, \divisor, #1
-    rsb     \bits, \bits, #31
-    and     \remainder, \numerator, \divisor
-    mov     \quotient, \numerator, lsr \bits
-.else
-    sub     \divisor, \divisor, #1
-    and     \remainder, \numerator, \divisor
-.endif
-.else
-    rsb     \bits, \bits, #31
-.ifnc "", "\div0label"
-    bne     \div0label
-.endif
-    mov     \quotient, \numerator, lsr \bits
-.endif
-.if \return
-    bx      lr
-.else
-    b       99f
-.endif
-30:
-    /* Handle numerator < divisor - quotient is zero, remainder is numerator,
-       which must be restored to its original value on ARMv6. */
-.ifnc "", "\remainder"
-    mov     \remainder, \numerator
-.endif
-.ifnc "", "\quotient"
-    mov     \quotient, #0
-.endif
-.if \return
-    bx      lr
-.endif
-#if ARM_ARCH >= 6
-40:
-    /* Handle large (sign bit set) numerators. Works exactly as the ARMv5e code
-       above 10:. */
-    umull   \bits, \inv, \numerator, \divisor
-    add     \numerator, \numerator, \neg
-.ifnc "", "\remainder"
-    mla     \remainder, \inv, \neg, \numerator
-.ifnc "", "\quotient"
-    mla     \remainder, \inv, \neg, \numerator
-    mov     \quotient, \inv
-    cmn     \remainder, \neg
-    subcs   \remainder, \remainder, \neg
-    addpl   \remainder, \remainder, \neg, lsl #1
-    addcc   \quotient, \quotient, #1
-    addpl   \quotient, \quotient, #2
-.else
-    cmn     \remainder, \neg
-    subcs   \remainder, \remainder, \neg
-    addpl   \remainder, \remainder, \neg, lsl #1
-.endif
-.else
-    mla     \divisor, \inv, \neg, \numerator
-    mov     \quotient, \inv
-    cmn     \divisor, \neg
-    addcc   \quotient, \quotient, #1
-    addpl   \quotient, \quotient, #2
-.endif
-.if \return
-    bx      lr
-.else
-    b       99f
-.endif
-#endif
-99:
-.endm
-
-.macro ARMV5_SDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, sign, div0label, return
-    /* sign[31] = divisor sign */
-    ands    \sign, \divisor, #1<<31
-    rsbne   \divisor, \divisor, #0
-    /* sign[31] = result sign, sign[0:30], C = numerator sign */
-    eors    \sign, \sign, \numerator, asr #32
-    clz     \bits, \divisor
-    rsbcs   \numerator, \numerator, #0
-    /* On ARMv6, subtract divisor before performing division, which ensures
-       numerator sign bit is clear and smmul may be used in place of umull. The
-       fixup for the results can be fit entirely into existing delay slots on
-       the main division paths. It costs 1c in the num<div path if the
-       the remainder is to be produced in the numerator's register, and 1c in
-       the power-of-2-divisor path only if producing both remainder and
-       quotient. */
-#if ARM_ARCH >= 6
-    subs    \numerator, \numerator, \divisor
-#else
-    cmp     \numerator, \divisor
-#endif
-    movcs   \inv, \divisor, lsl \bits
-    bcc     30f
-    /* Test whether divisor is 2^N */
-    cmp     \inv, #1<<31
-    add     \inv, pc, \inv, lsr #25
-    bls     20f
-    /* Load approximate reciprocal */
-    ldrb    \inv, [\inv, #.L_udiv_est_table-.-64]
-    subs    \bits, \bits, #7
-    rsb     \neg, \divisor, #0
-    /* Scale approximate reciprocal, or else branch to large-divisor path */
-    movpl   \divisor, \inv, lsl \bits
-    bmi     10f
-    /* Newton-Raphson iteration to improve reciprocal accuracy */
-    mul     \inv, \divisor, \neg
-    smlawt  \divisor, \divisor, \inv, \divisor
-    mul     \inv, \divisor, \neg
-    /* Complete N-R math and produce approximate quotient. Use smmla/smmul on
-       ARMv6. */
-#if ARM_ARCH >= 6
-    smmla   \divisor, \divisor, \inv, \divisor
-    smmul   \inv, \numerator, \divisor
-#else
-    mov     \bits, #0
-    smlal   \bits, \divisor, \inv, \divisor
-    umull   \bits, \inv, \numerator, \divisor
-#endif
-    /* Calculate remainder and correct quotient. */
-    add     \numerator, \numerator, \neg
-.ifnc "", "\remainder"
-    mla     \remainder, \inv, \neg, \numerator
-.ifnc "", "\quotient"
-#if ARM_ARCH >= 6
-    add     \quotient, \inv, #1
-#else
-    mov     \quotient, \inv
-#endif
-    cmn     \remainder, \neg
-    subcs   \remainder, \remainder, \neg
-    addpl   \remainder, \remainder, \neg, lsl #1
-    addcc   \quotient, \quotient, #1
-    addpl   \quotient, \quotient, #2
-.else
-    cmn     \remainder, \neg
-    subcs   \remainder, \remainder, \neg
-    addpl   \remainder, \remainder, \neg, lsl #1
-.endif
-.else
-    mla     \divisor, \inv, \neg, \numerator
-#if ARM_ARCH >= 6
-    add     \quotient, \inv, #1
-#else
-    mov     \quotient, \inv
-#endif
-    cmn     \divisor, \neg
-    addcc   \quotient, \quotient, #1
-    addpl   \quotient, \quotient, #2
-.endif
-    ARM_SDIV32_POST \quotient, \remainder, \sign
-.ifnc "", "\return"
-    \return
-.else
-    b       99f
-.endif
-10:
-    /* Very large divisors can be handled without further improving the
-       reciprocal. First the reciprocal must be reduced to ensure that it
-       underestimates the correct value. */
-    rsb     \bits, \bits, #0
-    sub     \inv, \inv, #4
-    mov     \divisor, \inv, lsr \bits
-    /* Calculate approximate quotient and remainder */
-#if ARM_ARCH >= 6
-    smmul   \inv, \numerator, \divisor
-#else
-    umull   \bits, \inv, \numerator, \divisor
-#endif
-    /* Correct quotient and remainder */
-.ifnc "", "\remainder"
-    mla     \remainder, \inv, \neg, \numerator
-.ifnc "", "\quotient"
-#if ARM_ARCH >= 6
-    add     \quotient, \inv, #1
-#else
-    mov     \quotient, \inv
-#endif
-    cmn     \neg, \remainder, lsr #1
-    addcs   \remainder, \remainder, \neg, lsl #1
-    addcs   \quotient, \quotient, #2
-    cmn     \neg, \remainder
-    addcs   \remainder, \remainder, \neg
-    addcs   \quotient, \quotient, #1
-.else
-    cmn     \neg, \remainder, lsr #1
-    addcs   \remainder, \remainder, \neg, lsl #1
-    cmn     \neg, \remainder
-    addcs   \remainder, \remainder, \neg
-.endif
-.else
-    mla     \divisor, \inv, \neg, \numerator
-#if ARM_ARCH >= 6
-    add     \quotient, \inv, #1
-#else
-    mov     \quotient, \inv
-#endif
-    cmn     \neg, \divisor, lsr #1
-    addcs   \divisor, \divisor, \neg, lsl #1
-    addcs   \quotient, \quotient, #2
-    cmn     \neg, \divisor
-    addcs   \quotient, \quotient, #1
-.endif
-    ARM_SDIV32_POST \quotient, \remainder, \sign
-.ifnc "", "\return"
-    \return
-.else
-    b       99f
-.endif
-20:
-    /* Handle division by powers of two by shifting right. Mod is handled
-       by using divisor-1 as a bitmask. */
-.ifnc "", "\div0label"
-    bne     \div0label
-.endif
-.ifnc "", "\remainder"
-.ifnc "", "\quotient"
-    rsb     \bits, \bits, #31
-#if ARM_ARCH >= 6
-    add     \numerator, \numerator, \divisor
-#endif
-    sub     \divisor, \divisor, #1
-    and     \remainder, \numerator, \divisor
-    mov     \quotient, \numerator, lsr \bits
-.else
-    sub     \divisor, \divisor, #1
-    and     \remainder, \numerator, \divisor
-.endif
-.else
-    rsb     \bits, \bits, #31
-#if ARM_ARCH >= 6
-    add     \numerator, \numerator, \divisor
-#endif
-    mov     \quotient, \numerator, lsr \bits
-.endif
-    ARM_SDIV32_POST \quotient, \remainder, \sign
-.ifnc "", "\return"
-    \return
-.else
-    b       99f
-.endif
-30:
-    /* Handle numerator < divisor - quotient is zero, remainder is numerator,
-       which must be restored to its original value on ARMv6. */
-.ifnc "", "\remainder"
-#if ARM_ARCH >= 6
-    add     \remainder, \numerator, \divisor
-#else
-.ifnc "\remainder", "\numerator"
-    mov     \remainder, \numerator
-.endif
-#endif
-.endif
-.ifnc "", "\quotient"
-    mov     \quotient, #0
-.endif
-.ifnc "", "\remainder"
-    ARM_SDIV32_POST "", \remainder, \sign
-.endif
-.ifnc "", "\return"
-    \return
-.endif
-99:
-.endm
-#endif
-
-    .section .text
-
-__div0_wrap_s:
-    sub sp, sp, #4
-    b       __div0
-    .size __div0_wrap_s, . - __div0_wrap_s
-
-__div0_wrap:
-    str     lr, [sp, #-4]!
-    b       __div0
-    .size __div0_wrap, . - __div0_wrap
-
-#ifndef __ARM_EABI__
-    .global __divsi3
-    .type   __divsi3,%function
-    .global __udivsi3
-    .type   __udivsi3,%function
-    .global __udivsi3
-    .type   __udivsi3,%function
-#else
-/* The div+mod averagess a fraction of a cycle worse for signed values, and
-   slightly better for unsigned, so just alias div to divmod. */
-    .global __aeabi_uidivmod
-    .type   __aeabi_uidivmod,%function
-    .global __aeabi_uidiv
-    .type   __aeabi_uidiv,%function
-    .set    __aeabi_uidiv,__aeabi_uidivmod
-    .global __aeabi_idivmod
-    .type   __aeabi_idivmod,%function
-    .global __aeabi_idiv
-    .type   __aeabi_idiv,%function
-    .set    __aeabi_idiv,__aeabi_idivmod
-#endif
-
-
-#if ARM_ARCH < 5
-    .global __clzsi2
-    .type   __clzsi2, %function
-
-__clzsi2:
-    orr r0, r0, r0, lsr #8
-    orr r0, r0, r0, lsr #4
-    orr r0, r0, r0, lsr #2
-    orr r0, r0, r0, lsr #1
-    bic r0, r0, r0, lsr #16
-    rsb r0, r0, r0, lsl #14
-    rsb r0, r0, r0, lsl #11
-    rsb r0, r0, r0, lsl #9
-    ldrb r0, [pc, r0, lsr #26]
-    bx lr
-    .byte 32, 20, 19,  0,  0, 18,  0,  7, 10, 17,  0,  0, 14,  0,  6,  0
-    .byte  0,  9,  0, 16,  0,  0,  1, 26,  0, 13,  0,  0, 24,  5,  0,  0
-    .byte  0, 21,  0,  8, 11,  0, 15,  0,  0,  0,  0,  2, 27,  0, 25,  0
-    .byte 22,  0, 12,  0,  0,  3, 28,  0, 23,  0,  4, 29,  0,  0, 30, 31
-    .size __clzsi2, .-__clzsi2
-
-#ifndef __ARM_EABI__
-__udivsi3:
-    ARMV4_UDIV32_BODY r0, r1, r0, "", r2, r3, __div0_wrap, 1
-    .size __udivsi3, . - __udivsi3
-
-__divsi3:
-    ARMV4_SDIV32_BODY r0, r1, r0, "", r2, r3, __div0_wrap, 1
-    .size __divsi3, . - __divsi3
-
-#else
-__aeabi_uidivmod:
-    ARMV4_UDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1
-    .size __aeabi_uidivmod, . - __aeabi_uidivmod
-
-__aeabi_idivmod:
-    ARMV4_SDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1
-    .size __aeabi_idivmod, . - __aeabi_idivmod
-#endif
-
-#else
-#ifndef __ARM_EABI__
-__udivsi3:
-    ARMV5_UDIV32_BODY r0, r1, r0, "", r2, r3, ip, __div0_wrap, 1
-    .size __udivsi3, . - __udivsi3
-
-__divsi3:
-    str lr, [sp, #-4]
-    ARMV5_SDIV32_BODY r0, r1, r0, "", r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]"
-    .size __divsi3, . - __divsi3
-
-#else
-__aeabi_uidivmod:
-    ARMV5_UDIV32_BODY r0, r1, r0, r1, r2, r3, ip, __div0_wrap, 1
-    .size __aeabi_uidivmod, . - __aeabi_uidivmod
-
-__aeabi_idivmod:
-    str lr, [sp, #-4]
-    ARMV5_SDIV32_BODY r0, r1, r0, r1, r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]"
-    .size __aeabi_idivmod, . - __aeabi_idivmod
-#endif
-
-.L_udiv_est_table:
-    .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6
-    .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf
-    .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc
-    .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac
-    .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f
-    .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93
-    .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89
-    .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81
-#endif
diff --git a/lib/arm_support/arm_support.make b/lib/arm_support/arm_support.make
new file mode 100644
index 0000000000..0f6f7683b4
--- /dev/null
+++ b/lib/arm_support/arm_support.make
@@ -0,0 +1,17 @@
+#             __________               __   ___.
+#   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+#   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+#   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+#   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+#                     \/            \/     \/    \/            \/
+#
+
+ARMSUPPORT_DIR = $(ROOTDIR)/lib/arm_support
+ARMSUPPORT_SRC = $(ARMSUPPORT_DIR)/support-arm.S
+ARMSUPPORT_OBJ := $(call c2obj, $(ARMSUPPORT_SRC))
+
+OTHER_SRC += $(ARMSUPPORT_SRC)
+
+$(LIBARMSUPPORT): $(ARMSUPPORT_OBJ)
+	$(SILENT)$(shell rm -f $@)
+	$(call PRINTS,AR $(@F))$(AR) rcs $@ $^ >/dev/null
diff --git a/lib/arm_support/support-arm.S b/lib/arm_support/support-arm.S
new file mode 100644
index 0000000000..6141a3f158
--- /dev/null
+++ b/lib/arm_support/support-arm.S
@@ -0,0 +1,703 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Jens Arnold
+ * Copyright (C) 2009 by Andrew Mahone
+ *
+ * Optimised replacements for libgcc functions
+ *
+ * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System
+ *           Developer's Guide
+ * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+ * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+ * Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include <config.h>
+
+.macro ARM_SDIV32_PRE numerator, divisor, sign
+    /* sign[31] = divisor sign */
+    ands    \sign, \divisor, #1<<31
+    rsbeq   \divisor, \divisor, #0
+    /* sign[31] = result sign, sign[0:30], C = numerator sign */
+    eors    \sign, \sign, \numerator, asr #32
+    rsbcs   \numerator, \numerator, #0
+.endm
+
+.macro ARM_SDIV32_POST quotient, remainder, sign
+    movs    \sign, \sign, lsl #1
+.ifnc "", "\quotient"
+    rsbcs   \quotient, \quotient, #0
+.endif
+.ifnc "", "\remainder"
+    rsbmi   \remainder, \remainder, #0
+.endif
+.endm
+
+#if ARM_ARCH < 5
+.macro ARMV4_UDIV32_BODY numerator, divisor, quotient, remainder, tmp, bits, div0label, return
+.ifnc "", "\div0label"
+    rsbs    \divisor, \divisor, #0
+    beq     \div0label
+.else
+    rsb     \divisor, \divisor, #0
+.endif
+    /* This SWAR divider requires a numerator less than 1<<31, because it must
+       be able to shift the remainder left at each step without shifting out
+       topmost bit. Since a shift might be needed for the aligned remainder to
+       exceed the divisor, the topmost bit must be unset at the start to avoid
+       this overflow case. The original numerator is saved so that the result
+       can be corrected after the reduced division completes. */
+    cmn     \numerator, \divisor
+.ifc "", "\quotient"
+.ifc "\numerator", "\remainder"
+.if \return
+    bxcc    lr
+.else
+    b 99f
+.endif
+.else
+    bcc     20f
+.endif
+.else
+    bcc     20f
+.endif
+    movs    \tmp, \numerator
+    movmi   \numerator, \numerator, lsr #1
+    mov     \bits, #30
+.set shift, 16
+.rept 5
+    cmn     \divisor, \numerator, lsr #shift
+    subcs   \bits, \bits, #shift
+    movcs   \divisor, \divisor, lsl #shift
+.set shift, shift >> 1
+.endr
+    adds    \numerator, \numerator, \divisor
+    subcc   \numerator, \numerator, \divisor
+    add     pc, pc, \bits, lsl #3
+    nop
+.rept 30
+    adcs    \numerator, \divisor, \numerator, lsl #1
+    subcc   \numerator, \numerator, \divisor
+.endr
+    adc     \numerator, \numerator, \numerator
+    movs    \tmp, \tmp, asr #1
+    rsb     \bits, \bits, #31
+    bmi     10f
+.ifc "", "\quotient"
+    mov     \remainder, \numerator, lsr \bits
+.else
+.ifc "", "\remainder"
+    mov     \divisor, \numerator, lsr \bits
+    eor     \quotient, \numerator, \divisor, lsl \bits
+.else
+    mov     \remainder, \numerator, lsr \bits
+    eor     \quotient, \numerator, \remainder, lsl \bits
+.endif
+.endif
+.ifne \return
+    bx      lr
+.else
+    b       99f
+.endif
+10:
+    mov     \tmp, \numerator, lsr \bits
+    eor     \numerator, \numerator, \tmp, lsl \bits
+    sub     \bits, \bits, #1
+    adc     \tmp, \tmp, \tmp
+    adds    \tmp, \tmp, \divisor, asr \bits
+.ifnc "", "\quotient"
+    adc     \quotient, \numerator, \numerator
+.endif
+.ifnc "", "\remainder"
+    subcc   \remainder, \tmp, \divisor, asr \bits
+    movcs   \remainder, \tmp
+.endif
+.ifne \return
+    bx      lr
+.else
+    b       99f
+.endif
+20:
+.ifnc "", "\remainder"
+.ifnc "\remainder", "\numerator"
+    mov     \remainder, \numerator
+.endif
+.endif
+.ifnc "", "\quotient"
+    mov   \quotient, #0
+.endif
+.ifne \return
+    bx      lr
+.else
+99:
+.endif
+.endm
+
+.macro ARMV4_SDIV32_BODY numerator, divisor, quotient, remainder, bits, sign, div0label, return
+    /* When this is wrapped for signed division, the wrapper code will handle
+       inverting the divisor, and also the zero divisor test. */
+    ARM_SDIV32_PRE \numerator, \divisor, \sign
+.ifnc "",   "\div0label"
+    tst     \divisor, \divisor
+    beq     \div0label
+.endif
+    /* This SWAR divider requires a numerator less than 1<<31, because it must
+       be able to shift the remainder left at each step without shifting out
+       topmost bit. With signed inputs, whose absolute value may not exceed
+       1<<31,this may be accomplished simply by subtracting the divisor before
+       beginning division, and adding 1 to the quotient. */
+    adds    \numerator, \numerator, \divisor
+    bcc     20f
+    mov     \bits, #30
+.set shift, 16
+.rept 5
+    cmn     \divisor, \numerator, lsr #shift
+    subcs   \bits, \bits, #shift
+    movcs   \divisor, \divisor, lsl #shift
+.set shift, shift >> 1
+.endr
+    adds    \numerator, \numerator, \divisor
+    subcc   \numerator, \numerator, \divisor
+    add     pc, pc, \bits, lsl #3
+    nop
+.rept 30
+    adcs    \numerator, \divisor, \numerator, lsl #1
+    subcc   \numerator, \numerator, \divisor
+.endr
+    rsb     \bits, \bits, #31
+    adc     \numerator, \numerator, \numerator
+.ifc "", "\quotient"
+    mov     \remainder, \numerator, lsr \bits
+.else
+.ifc "", "\remainder"
+    mov     \divisor, \numerator, lsr \bits
+    add     \numerator, \numerator, #1
+    sub     \quotient, \numerator, \divisor, lsl \bits
+.else
+    mov     \remainder, \numerator, lsr \bits
+    add     \numerator, \numerator, #1
+    sub     \quotient, \numerator, \remainder, lsl \bits
+.endif
+.endif
+.ifne \return
+    ARM_SDIV32_POST \quotient, \remainder, \sign
+    bx      lr
+.else
+    b       99f
+.endif
+20:
+.ifnc "", "\remainder"
+    sub     \remainder, \numerator, \divisor
+.endif
+.ifnc "", "\quotient"
+    mov     \quotient, #0
+.endif
+.ifne \return
+    ARM_SDIV32_POST "", \remainder, \sign
+    bx      lr
+.else
+99:
+    ARM_SDIV32_POST \quotient, \remainder, \sign
+.endif
+.endm
+
+#else
+.macro ARMV5_UDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, div0label, return
+    cmp     \numerator, \divisor
+    clz     \bits, \divisor
+    bcc     30f
+    mov     \inv, \divisor, lsl \bits
+    add     \neg, pc, \inv, lsr #25
+    /* Test whether divisor is 2^N */
+    cmp     \inv, #1<<31
+    /* Load approximate reciprocal */
+    ldrhib  \inv, [\neg, #.L_udiv_est_table-.-64]
+    bls     20f
+    subs    \bits, \bits, #7
+    rsb     \neg, \divisor, #0
+    /* Scale approximate reciprocal, or else branch to large-divisor path */
+    movpl   \divisor, \inv, lsl \bits
+    bmi     10f
+    /* Newton-Raphson iteration to improve reciprocal accuracy */
+    mul     \inv, \divisor, \neg
+    smlawt  \divisor, \divisor, \inv, \divisor
+    mul     \inv, \divisor, \neg
+    /* Complete N-R math and produce approximate quotient. Use smmla/smmul on
+       ARMv6. */
+#if ARM_ARCH >= 6
+    tst     \numerator, \numerator
+    smmla   \divisor, \divisor, \inv, \divisor
+    /* Branch to large-numerator handler, or else use smmul if sign bit is not
+       set. This wins on average with random numerators, and should be no
+       slower than using umull for small numerator, even if prediction fails.
+    */
+    bmi     40f
+    smmul   \inv, \numerator, \divisor
+#else
+    /* ARMv5e lacks smmul, so always uses umull. */
+    mov     \bits, #0
+    smlal   \bits, \divisor, \inv, \divisor
+    umull   \bits, \inv, \numerator, \divisor
+#endif
+    /* Calculate remainder and correct result. */
+    add     \numerator, \numerator, \neg
+.ifnc "", "\remainder"
+    mla     \remainder, \inv, \neg, \numerator
+.ifnc "", "\quotient"
+    mov     \quotient, \inv
+    cmn     \remainder, \neg
+    subcs   \remainder, \remainder, \neg
+    addpl   \remainder, \remainder, \neg, lsl #1
+    addcc   \quotient, \quotient, #1
+    addpl   \quotient, \quotient, #2
+.else
+    cmn     \remainder, \neg
+    subcs   \remainder, \remainder, \neg
+    addpl   \remainder, \remainder, \neg, lsl #1
+.endif
+.else
+    mla     \divisor, \inv, \neg, \numerator
+    mov     \quotient, \inv
+    cmn     \divisor, \neg
+    addcc   \quotient, \quotient, #1
+    addpl   \quotient, \quotient, #2
+.endif
+.if \return
+    bx      lr
+.else
+    b       99f
+.endif
+10:
+    /* Very large divisors can be handled without further improving the
+       reciprocal. First the reciprocal must be reduced to ensure that it
+       underestimates the correct value. */
+    rsb     \bits, \bits, #0
+    sub     \inv, \inv, #4
+    mov     \divisor, \inv, lsr \bits
+    /* Calculate approximate quotient and remainder */
+    umull   \bits, \inv, \numerator, \divisor
+    /* Correct quotient and remainder */
+.ifnc "", "\remainder"
+    mla     \remainder, \inv, \neg, \numerator
+.ifnc "", "\quotient"
+    mov     \quotient, \inv
+    cmn     \neg, \remainder, lsr #1
+    addcs   \remainder, \remainder, \neg, lsl #1
+    addcs   \quotient, \quotient, #2
+    cmn     \neg, \remainder
+    addcs   \remainder, \remainder, \neg
+    addcs   \quotient, \quotient, #1
+.else
+    cmn     \neg, \remainder, lsr #1
+    addcs   \remainder, \remainder, \neg, lsl #1
+    cmn     \neg, \remainder
+    addcs   \remainder, \remainder, \neg
+.endif
+.else
+    mla     \divisor, \inv, \neg, \numerator
+    mov     \quotient, \inv
+    cmn     \neg, \divisor, lsr #1
+    addcs   \divisor, \divisor, \neg, lsl #1
+    addcs   \quotient, \quotient, #2
+    cmn     \neg, \divisor
+    addcs   \quotient, \quotient, #1
+.endif
+.if \return
+    bx      lr
+.else
+    b       99f
+.endif
+20:
+    /* Handle division by powers of two by shifting right. Mod is handled
+       by using divisor-1 as a bitmask. */
+.ifnc "", "\remainder"
+.ifnc "", "\div0label"
+    bne     \div0label
+.endif
+.ifnc "", "\quotient"
+    sub     \divisor, \divisor, #1
+    rsb     \bits, \bits, #31
+    and     \remainder, \numerator, \divisor
+    mov     \quotient, \numerator, lsr \bits
+.else
+    sub     \divisor, \divisor, #1
+    and     \remainder, \numerator, \divisor
+.endif
+.else
+    rsb     \bits, \bits, #31
+.ifnc "", "\div0label"
+    bne     \div0label
+.endif
+    mov     \quotient, \numerator, lsr \bits
+.endif
+.if \return
+    bx      lr
+.else
+    b       99f
+.endif
+30:
+    /* Handle numerator < divisor - quotient is zero, remainder is numerator,
+       which must be restored to its original value on ARMv6. */
+.ifnc "", "\remainder"
+    mov     \remainder, \numerator
+.endif
+.ifnc "", "\quotient"
+    mov     \quotient, #0
+.endif
+.if \return
+    bx      lr
+.endif
+#if ARM_ARCH >= 6
+40:
+    /* Handle large (sign bit set) numerators. Works exactly as the ARMv5e code
+       above 10:. */
+    umull   \bits, \inv, \numerator, \divisor
+    add     \numerator, \numerator, \neg
+.ifnc "", "\remainder"
+    mla     \remainder, \inv, \neg, \numerator
+.ifnc "", "\quotient"
+    mla     \remainder, \inv, \neg, \numerator
+    mov     \quotient, \inv
+    cmn     \remainder, \neg
+    subcs   \remainder, \remainder, \neg
+    addpl   \remainder, \remainder, \neg, lsl #1
+    addcc   \quotient, \quotient, #1
+    addpl   \quotient, \quotient, #2
+.else
+    cmn     \remainder, \neg
+    subcs   \remainder, \remainder, \neg
+    addpl   \remainder, \remainder, \neg, lsl #1
+.endif
+.else
+    mla     \divisor, \inv, \neg, \numerator
+    mov     \quotient, \inv
+    cmn     \divisor, \neg
+    addcc   \quotient, \quotient, #1
+    addpl   \quotient, \quotient, #2
+.endif
+.if \return
+    bx      lr
+.else
+    b       99f
+.endif
+#endif
+99:
+.endm
+
+.macro ARMV5_SDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, sign, div0label, return
+    /* sign[31] = divisor sign */
+    ands    \sign, \divisor, #1<<31
+    rsbne   \divisor, \divisor, #0
+    /* sign[31] = result sign, sign[0:30], C = numerator sign */
+    eors    \sign, \sign, \numerator, asr #32
+    clz     \bits, \divisor
+    rsbcs   \numerator, \numerator, #0
+    /* On ARMv6, subtract divisor before performing division, which ensures
+       numerator sign bit is clear and smmul may be used in place of umull. The
+       fixup for the results can be fit entirely into existing delay slots on
+       the main division paths. It costs 1c in the num<div path if the
+       the remainder is to be produced in the numerator's register, and 1c in
+       the power-of-2-divisor path only if producing both remainder and
+       quotient. */
+#if ARM_ARCH >= 6
+    subs    \numerator, \numerator, \divisor
+#else
+    cmp     \numerator, \divisor
+#endif
+    movcs   \inv, \divisor, lsl \bits
+    bcc     30f
+    /* Test whether divisor is 2^N */
+    cmp     \inv, #1<<31
+    add     \inv, pc, \inv, lsr #25
+    bls     20f
+    /* Load approximate reciprocal */
+    ldrb    \inv, [\inv, #.L_udiv_est_table-.-64]
+    subs    \bits, \bits, #7
+    rsb     \neg, \divisor, #0
+    /* Scale approximate reciprocal, or else branch to large-divisor path */
+    movpl   \divisor, \inv, lsl \bits
+    bmi     10f
+    /* Newton-Raphson iteration to improve reciprocal accuracy */
+    mul     \inv, \divisor, \neg
+    smlawt  \divisor, \divisor, \inv, \divisor
+    mul     \inv, \divisor, \neg
+    /* Complete N-R math and produce approximate quotient. Use smmla/smmul on
+       ARMv6. */
+#if ARM_ARCH >= 6
+    smmla   \divisor, \divisor, \inv, \divisor
+    smmul   \inv, \numerator, \divisor
+#else
+    mov     \bits, #0
+    smlal   \bits, \divisor, \inv, \divisor
+    umull   \bits, \inv, \numerator, \divisor
+#endif
+    /* Calculate remainder and correct quotient. */
+    add     \numerator, \numerator, \neg
+.ifnc "", "\remainder"
+    mla     \remainder, \inv, \neg, \numerator
+.ifnc "", "\quotient"
+#if ARM_ARCH >= 6
+    add     \quotient, \inv, #1
+#else
+    mov     \quotient, \inv
+#endif
+    cmn     \remainder, \neg
+    subcs   \remainder, \remainder, \neg
+    addpl   \remainder, \remainder, \neg, lsl #1
+    addcc   \quotient, \quotient, #1
+    addpl   \quotient, \quotient, #2
+.else
+    cmn     \remainder, \neg
+    subcs   \remainder, \remainder, \neg
+    addpl   \remainder, \remainder, \neg, lsl #1
+.endif
+.else
+    mla     \divisor, \inv, \neg, \numerator
+#if ARM_ARCH >= 6
+    add     \quotient, \inv, #1
+#else
+    mov     \quotient, \inv
+#endif
+    cmn     \divisor, \neg
+    addcc   \quotient, \quotient, #1
+    addpl   \quotient, \quotient, #2
+.endif
+    ARM_SDIV32_POST \quotient, \remainder, \sign
+.ifnc "", "\return"
+    \return
+.else
+    b       99f
+.endif
+10:
+    /* Very large divisors can be handled without further improving the
+       reciprocal. First the reciprocal must be reduced to ensure that it
+       underestimates the correct value. */
+    rsb     \bits, \bits, #0
+    sub     \inv, \inv, #4
+    mov     \divisor, \inv, lsr \bits
+    /* Calculate approximate quotient and remainder */
+#if ARM_ARCH >= 6
+    smmul   \inv, \numerator, \divisor
+#else
+    umull   \bits, \inv, \numerator, \divisor
+#endif
+    /* Correct quotient and remainder */
+.ifnc "", "\remainder"
+    mla     \remainder, \inv, \neg, \numerator
+.ifnc "", "\quotient"
+#if ARM_ARCH >= 6
+    add     \quotient, \inv, #1
+#else
+    mov     \quotient, \inv
+#endif
+    cmn     \neg, \remainder, lsr #1
+    addcs   \remainder, \remainder, \neg, lsl #1
+    addcs   \quotient, \quotient, #2
+    cmn     \neg, \remainder
+    addcs   \remainder, \remainder, \neg
+    addcs   \quotient, \quotient, #1
+.else
+    cmn     \neg, \remainder, lsr #1
+    addcs   \remainder, \remainder, \neg, lsl #1
+    cmn     \neg, \remainder
+    addcs   \remainder, \remainder, \neg
+.endif
+.else
+    mla     \divisor, \inv, \neg, \numerator
+#if ARM_ARCH >= 6
+    add     \quotient, \inv, #1
+#else
+    mov     \quotient, \inv
+#endif
+    cmn     \neg, \divisor, lsr #1
+    addcs   \divisor, \divisor, \neg, lsl #1
+    addcs   \quotient, \quotient, #2
+    cmn     \neg, \divisor
+    addcs   \quotient, \quotient, #1
+.endif
+    ARM_SDIV32_POST \quotient, \remainder, \sign
+.ifnc "", "\return"
+    \return
+.else
+    b       99f
+.endif
+20:
+    /* Handle division by powers of two by shifting right. Mod is handled
+       by using divisor-1 as a bitmask. */
+.ifnc "", "\div0label"
+    bne     \div0label
+.endif
+.ifnc "", "\remainder"
+.ifnc "", "\quotient"
+    rsb     \bits, \bits, #31
+#if ARM_ARCH >= 6
+    add     \numerator, \numerator, \divisor
+#endif
+    sub     \divisor, \divisor, #1
+    and     \remainder, \numerator, \divisor
+    mov     \quotient, \numerator, lsr \bits
+.else
+    sub     \divisor, \divisor, #1
+    and     \remainder, \numerator, \divisor
+.endif
+.else
+    rsb     \bits, \bits, #31
+#if ARM_ARCH >= 6
+    add     \numerator, \numerator, \divisor
+#endif
+    mov     \quotient, \numerator, lsr \bits
+.endif
+    ARM_SDIV32_POST \quotient, \remainder, \sign
+.ifnc "", "\return"
+    \return
+.else
+    b       99f
+.endif
+30:
+    /* Handle numerator < divisor - quotient is zero, remainder is numerator,
+       which must be restored to its original value on ARMv6. */
+.ifnc "", "\remainder"
+#if ARM_ARCH >= 6
+    add     \remainder, \numerator, \divisor
+#else
+.ifnc "\remainder", "\numerator"
+    mov     \remainder, \numerator
+.endif
+#endif
+.endif
+.ifnc "", "\quotient"
+    mov     \quotient, #0
+.endif
+.ifnc "", "\remainder"
+    ARM_SDIV32_POST "", \remainder, \sign
+.endif
+.ifnc "", "\return"
+    \return
+.endif
+99:
+.endm
+#endif
+
+    .section .text
+
+__div0_wrap_s:
+    sub sp, sp, #4
+    b       __div0
+    .size __div0_wrap_s, . - __div0_wrap_s
+
+__div0_wrap:
+    str     lr, [sp, #-4]!
+    b       __div0
+    .size __div0_wrap, . - __div0_wrap
+
+#ifndef __ARM_EABI__
+    .global __divsi3
+    .type   __divsi3,%function
+    .global __udivsi3
+    .type   __udivsi3,%function
+    .global __udivsi3
+    .type   __udivsi3,%function
+#else
+/* The div+mod averagess a fraction of a cycle worse for signed values, and
+   slightly better for unsigned, so just alias div to divmod. */
+    .global __aeabi_uidivmod
+    .type   __aeabi_uidivmod,%function
+    .global __aeabi_uidiv
+    .type   __aeabi_uidiv,%function
+    .set    __aeabi_uidiv,__aeabi_uidivmod
+    .global __aeabi_idivmod
+    .type   __aeabi_idivmod,%function
+    .global __aeabi_idiv
+    .type   __aeabi_idiv,%function
+    .set    __aeabi_idiv,__aeabi_idivmod
+#endif
+
+
+#if ARM_ARCH < 5
+    .global __clzsi2
+    .type   __clzsi2, %function
+
+__clzsi2:
+    orr r0, r0, r0, lsr #8
+    orr r0, r0, r0, lsr #4
+    orr r0, r0, r0, lsr #2
+    orr r0, r0, r0, lsr #1
+    bic r0, r0, r0, lsr #16
+    rsb r0, r0, r0, lsl #14
+    rsb r0, r0, r0, lsl #11
+    rsb r0, r0, r0, lsl #9
+    ldrb r0, [pc, r0, lsr #26]
+    bx lr
+    .byte 32, 20, 19,  0,  0, 18,  0,  7, 10, 17,  0,  0, 14,  0,  6,  0
+    .byte  0,  9,  0, 16,  0,  0,  1, 26,  0, 13,  0,  0, 24,  5,  0,  0
+    .byte  0, 21,  0,  8, 11,  0, 15,  0,  0,  0,  0,  2, 27,  0, 25,  0
+    .byte 22,  0, 12,  0,  0,  3, 28,  0, 23,  0,  4, 29,  0,  0, 30, 31
+    .size __clzsi2, .-__clzsi2
+
+#ifndef __ARM_EABI__
+__udivsi3:
+    ARMV4_UDIV32_BODY r0, r1, r0, "", r2, r3, __div0_wrap, 1
+    .size __udivsi3, . - __udivsi3
+
+__divsi3:
+    ARMV4_SDIV32_BODY r0, r1, r0, "", r2, r3, __div0_wrap, 1
+    .size __divsi3, . - __divsi3
+
+#else
+__aeabi_uidivmod:
+    ARMV4_UDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1
+    .size __aeabi_uidivmod, . - __aeabi_uidivmod
+
+__aeabi_idivmod:
+    ARMV4_SDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1
+    .size __aeabi_idivmod, . - __aeabi_idivmod
+#endif
+
+#else
+#ifndef __ARM_EABI__
+__udivsi3:
+    ARMV5_UDIV32_BODY r0, r1, r0, "", r2, r3, ip, __div0_wrap, 1
+    .size __udivsi3, . - __udivsi3
+
+__divsi3:
+    str lr, [sp, #-4]
+    ARMV5_SDIV32_BODY r0, r1, r0, "", r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]"
+    .size __divsi3, . - __divsi3
+
+#else
+__aeabi_uidivmod:
+    ARMV5_UDIV32_BODY r0, r1, r0, r1, r2, r3, ip, __div0_wrap, 1
+    .size __aeabi_uidivmod, . - __aeabi_uidivmod
+
+__aeabi_idivmod:
+    str lr, [sp, #-4]
+    ARMV5_SDIV32_BODY r0, r1, r0, r1, r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]"
+    .size __aeabi_idivmod, . - __aeabi_idivmod
+#endif
+
+.L_udiv_est_table:
+    .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6
+    .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf
+    .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc
+    .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac
+    .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f
+    .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93
+    .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89
+    .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81
+#endif
diff --git a/tools/root.make b/tools/root.make
index dd827d6d3f..3cdee2caf7 100644
--- a/tools/root.make
+++ b/tools/root.make
@@ -70,9 +70,12 @@ ifeq (,$(findstring checkwps,$(APPSDIR)))
   endif
 endif
 
-#included before codecs.make and plugins.make so they see $(LIBSETJMP)
+#included before codecs.make and plugins.make so they see them)
 ifndef APP_TYPE
   include $(ROOTDIR)/lib/libsetjmp/libsetjmp.make
+  ifeq (arm,$(ARCH))
+    include $(ROOTDIR)/lib/arm_support/arm_support.make
+  endif
 endif
 
 ifneq (,$(findstring bootloader,$(APPSDIR)))
@@ -176,6 +179,13 @@ LINKRAM := $(BUILDDIR)/ram.link
 ROMLDS := $(FIRMDIR)/rom.lds
 LINKROM := $(BUILDDIR)/rom.link
 
+ifeq (arm,$(ARCH))
+  LIBARMSUPPORT_LINK := -larm_support
+else
+  LIBARMSUPPORT_LINK :=
+endif
+
+
 
 $(LINKRAM): $(RAMLDS) $(CONFIGFILE)
 	$(call PRINTS,PP $(@F))
@@ -185,19 +195,21 @@ $(LINKROM): $(ROMLDS)
 	$(call PRINTS,PP $(@F))
 	$(call preprocess2file,$<,$@,-DLOADADDRESS=$(LOADADDRESS))
 
-$(BUILDDIR)/rockbox.elf : $$(OBJ) $$(FIRMLIB) $$(VOICESPEEXLIB) $$(SKINLIB) $$(LINKRAM)
+$(BUILDDIR)/rockbox.elf : $$(OBJ) $$(FIRMLIB) $$(VOICESPEEXLIB) $$(SKINLIB) $$(LIBARMSUPPORT) $$(LINKRAM)
 	$(call PRINTS,LD $(@F))$(CC) $(GCCOPTS) -Os -nostdlib -o $@ $(OBJ) \
 		-L$(BUILDDIR)/firmware -lfirmware \
-		-L$(BUILDDIR)/lib -lskin_parser \
+		-L$(BUILDDIR)/lib -lskin_parser $(LIBARMSUPPORT_LINK) \
 		-L$(BUILDDIR)/apps/codecs $(VOICESPEEXLIB:lib%.a=-l%) \
 		-lgcc $(BOOTBOXLDOPTS) $(GLOBAL_LDOPTS) \
 		-T$(LINKRAM) -Wl,-Map,$(BUILDDIR)/rockbox.map
 
-$(BUILDDIR)/rombox.elf : $$(OBJ) $$(FIRMLIB) $$(VOICESPEEXLIB) $$(SKINLIB) $$(LINKROM)
+$(BUILDDIR)/rombox.elf : $$(OBJ) $$(FIRMLIB) $$(VOICESPEEXLIB) $$(SKINLIB) $$(LIBARMSUPPORT) $$(LINKROM)
 	$(call PRINTS,LD $(@F))$(CC) $(GCCOPTS) -Os -nostdlib -o $@ $(OBJ) \
-		$(VOICESPEEXLIB) $(FIRMLIB) -lgcc $(GLOBAL_LDOPTS) \
-		-L$(BUILDDIR)/lib -lskin_parser \
-        -L$(BUILDDIR)/firmware -T$(LINKROM) -Wl,-Map,$(BUILDDIR)/rombox.map
+		-L$(BUILDDIR)/firmware -lfirmware \
+		-L$(BUILDDIR)/lib -lskin_parser $(LIBARMSUPPORT_LINK) \
+		-L$(BUILDDIR)/apps/codecs $(VOICESPEEXLIB:lib%.a=-l%) \
+		-lgcc $(GLOBAL_LDOPTS) \
+        -T$(LINKROM) -Wl,-Map,$(BUILDDIR)/rombox.map
 
 $(BUILDDIR)/rockbox.bin : $(BUILDDIR)/rockbox.elf
 	$(call PRINTS,OC $(@F))$(OC) $(if $(filter yes, $(USE_ELF)), -S -x, -O binary) $< $@
-- 
cgit v1.2.3