1 files changed, 477 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libwavpack/arm.S b/lib/rbcodec/codecs/libwavpack/arm.S
new file mode 100644
index 0000000000..32de1df7de
--- /dev/null
+++ b/lib/rbcodec/codecs/libwavpack/arm.S
@@ -0,0 +1,477 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2006 by David Bryant
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+/* This is an assembly optimized version of the following WavPack function:
+ *
+ * void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp,
+ *                                   long *buffer, long sample_count);
+ *
+ * It performs a single pass of stereo decorrelation on the provided buffer.
+ * Note that this version of the function requires that the 8 previous stereo
+ * samples are visible and correct. In other words, it ignores the "samples_*"
+ * fields in the decorr_pass structure and gets the history data directly
+ * from the buffer. It does, however, return the appropriate history samples
+ * to the decorr_pass structure before returning.
+ *
+ * This is written to work on a ARM7TDMI processor. This version only uses the
+ * 32-bit multiply-accumulate instruction and so will overflow with 24-bit
+ * WavPack files.
+ */
+#include "config.h"
+        .text
+        .align
+        .global         decorr_stereo_pass_cont_arm
+/*
+ * on entry:
+ *
+ * r0 = struct decorr_pass *dpp
+ * r1 = long *buffer
+ * r2 = long sample_count
+ */
+decorr_stereo_pass_cont_arm:
+        stmfd   sp!, {r4 - r8, r10, r11, lr}
+        mov     r5, r0                  @ r5 = dpp
+        mov     r11, #512               @ r11 = 512 for rounding
+        ldrsh   r6, [r0, #2]            @ r6 = dpp->delta
+        ldrsh   r4, [r0, #4]            @ r4 = dpp->weight_A
+        ldrsh   r0, [r0, #6]            @ r0 = dpp->weight_B
+        cmp     r2, #0                  @ exit if no samples to process
+        beq     common_exit
+        add     r7, r1, r2, asl #3      @ r7 = buffer ending position
+        ldrsh   r2, [r5, #0]            @ r2 = dpp->term
+        cmp     r2, #0
+        bmi     minus_term
+        ldr     lr, [r1, #-16]          @ load 2 sample history from buffer
+        ldr     r10, [r1, #-12]         @  for terms 2, 17, and 18
+        ldr     r8, [r1, #-8]
+        ldr     r3, [r1, #-4]
+        cmp     r2, #17
+        beq     term_17_loop
+        cmp     r2, #18
+        beq     term_18_loop
+        cmp     r2, #2
+        beq     term_2_loop
+        b       term_default_loop       @ else handle default (1-8, except 2)
+minus_term:
+        mov     r10, #1024              @ r10 = -1024 for weight clipping
+        rsb     r10, r10, #0            @  (only used for negative terms)
+        cmn     r2, #1
+        beq     term_minus_1
+        cmn     r2, #2
+        beq     term_minus_2
+        cmn     r2, #3
+        beq     term_minus_3
+        b       common_exit
+/*
+ ******************************************************************************
+ * Loop to handle term = 17 condition
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 = 
+ * r2 = current sample          r10 = second previous left sample
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous right sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+term_17_loop:
+        rsbs    ip, lr, r8, asl #1      @ decorr value = (2 * prev) - 2nd prev
+        mov     lr, r8                  @ previous becomes 2nd previous
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
+        add     r8, r2, r8, asr #10     @  shift, and add to new sample
+        strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     .L325
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+.L325:  rsbs    ip, r10, r3, asl #1     @ do same thing for right channel
+        mov     r10, r3
+        ldr     r2, [r1], #4
+        mla     r3, ip, r0, r11
+        add     r3, r2, r3, asr #10
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L329
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+.L329:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_17_loop
+        b       store_1718              @ common exit for terms 17 & 18
+/*
+ ******************************************************************************
+ * Loop to handle term = 18 condition
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 = 
+ * r2 = current sample          r10 = second previous left sample
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous right sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+term_18_loop:
+        sub     ip, r8, lr              @ decorr value =
+        mov     lr, r8                  @  ((3 * prev) - 2nd prev) >> 1
+        adds    ip, r8, ip, asr #1
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
+        add     r8, r2, r8, asr #10     @  shift, and add to new sample
+        strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     .L337
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+.L337:  sub     ip, r3, r10             @ do same thing for right channel
+        mov     r10, r3
+        adds    ip, r3, ip, asr #1
+        ldr     r2, [r1], #4
+        mla     r3, ip, r0, r11
+        add     r3, r2, r3, asr #10
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L341
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+.L341:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_18_loop
+/* common exit for terms 17 & 18 */
+store_1718:
+        str     r3, [r5, #40]           @ store sample history into struct
+        str     r8, [r5, #8]
+        str     r10, [r5, #44]
+        str     lr, [r5, #12]
+        b       common_exit             @ and return
+/*
+ ******************************************************************************
+ * Loop to handle term = 2 condition
+ * (note that this case can be handled by the default term handler (1-8), but
+ * this special case is faster because it doesn't have to read memory twice)
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 = 
+ * r2 = current sample          r10 = second previous left sample
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous right sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+term_2_loop:
+        movs    ip, lr                  @ get decorrelation value & test
+        mov     lr, r8                  @ previous becomes 2nd previous
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
+        add     r8, r2, r8, asr #10     @  shift, and add to new sample
+        strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     .L225
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+.L225:  movs    ip, r10                 @ do same thing for right channel
+        mov     r10, r3
+        ldr     r2, [r1], #4
+        mla     r3, ip, r0, r11
+        add     r3, r2, r3, asr #10
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L229
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+.L229:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_2_loop
+        b       default_term_exit       @ this exit updates all dpp->samples
+/*
+ ******************************************************************************
+ * Loop to handle default term condition
+ *
+ * r0 = dpp->weight_B           r8 = result accumulator
+ * r1 = bptr                    r9 = 
+ * r2 = dpp->term               r10 =
+ * r3 = decorrelation value     r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr =
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+term_default_loop:
+        ldr     ip, [r1]                @ get original sample
+        ldr     r3, [r1, -r2, asl #3]   @ get decorrelation value based on term
+        mla     r8, r3, r4, r11         @ mult decorr value by weight, round,
+        add     r8, ip, r8, asr #10     @  shift and add to new sample
+        str     r8, [r1], #4            @ store update sample
+        cmp     r3, #0
+        cmpne   ip, #0
+        beq     .L350
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+.L350:  ldr     ip, [r1]                @ do the same thing for right channel
+        ldr     r3, [r1, -r2, asl #3]
+        mla     r8, r3, r0, r11
+        add     r8, ip, r8, asr #10
+        str     r8, [r1], #4
+        cmp     r3, #0
+        cmpne   ip, #0
+        beq     .L354
+        teq     ip, r3
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+.L354:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_default_loop
+/*
+ * This exit is used by terms 1-8 to store the previous 8 samples into the decorr
+ * structure (even if they are not all used for the given term)
+ */
+default_term_exit:
+        ldrsh   r3, [r5, #0]
+        sub     ip, r3, #1
+        mov     lr, #7
+.L358:  and     r3, ip, #7
+        add     r3, r5, r3, asl #2
+        ldr     r2, [r1, #-4]
+        str     r2, [r3, #40]
+        ldr     r2, [r1, #-8]!
+        str     r2, [r3, #8]
+        sub     ip, ip, #1
+        sub     lr, lr, #1
+        cmn     lr, #1
+        bne     .L358
+        b       common_exit
+/*
+ ******************************************************************************
+ * Loop to handle term = -1 condition
+ *
+ * r0 = dpp->weight_B           r8 =
+ * r1 = bptr                    r9 = 
+ * r2 = intermediate result     r10 = -1024 (for clipping)
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = updated left sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+term_minus_1:
+        ldr     r3, [r1, #-4]
+term_minus_1_loop:
+        ldr     ip, [r1]                @ for left channel the decorrelation value
+        mla     r2, r3, r4, r11         @  is the previous right sample (in r3)
+        add     lr, ip, r2, asr #10
+        str     lr, [r1], #8
+        cmp     r3, #0
+        cmpne   ip, #0
+        beq     .L361
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+        cmp     r4, #1024
+        movgt   r4, #1024
+        cmp     r4, r10
+        movlt   r4, r10
+.L361:  ldr     r2, [r1, #-4]           @ for right channel the decorrelation value
+        mla     r3, lr, r0, r11         @  is the just updated right sample (in lr)
+        add     r3, r2, r3, asr #10
+        str     r3, [r1, #-4]
+        cmp     lr, #0
+        cmpne   r2, #0
+        beq     .L369
+        teq     r2, lr
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+        cmp     r0, #1024               @ then clip weight to +/-1024
+        movgt   r0, #1024
+        cmp     r0, r10
+        movlt   r0, r10
+.L369:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_minus_1_loop
+        str     r3, [r5, #8]            @ else store right sample and exit
+        b       common_exit
+/*
+ ******************************************************************************
+ * Loop to handle term = -2 condition
+ * (note that the channels are processed in the reverse order here)
+ *
+ * r0 = dpp->weight_B           r8 =
+ * r1 = bptr                    r9 = 
+ * r2 = intermediate result     r10 = -1024 (for clipping)
+ * r3 = previous left sample    r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = updated right sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+term_minus_2:
+        ldr     r3, [r1, #-8]
+term_minus_2_loop:
+        ldr     ip, [r1, #4]            @ for right channel the decorrelation value
+        mla     r2, r3, r0, r11         @  is the previous left sample (in r3)
+        add     lr, ip, r2, asr #10
+        str     lr, [r1, #4]
+        cmp     r3, #0
+        cmpne   ip, #0
+        beq     .L380
+        teq     ip, r3                  @ update weight based on signs
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+        cmp     r0, #1024               @ then clip weight to +/-1024
+        movgt   r0, #1024
+        cmp     r0, r10
+        movlt   r0, r10
+.L380:  ldr     r2, [r1, #0]            @ for left channel the decorrelation value
+        mla     r3, lr, r4, r11         @  is the just updated left sample (in lr)
+        add     r3, r2, r3, asr #10
+        str     r3, [r1], #8
+        cmp     lr, #0
+        cmpne   r2, #0
+        beq     .L388
+        teq     r2, lr
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+        cmp     r4, #1024
+        movgt   r4, #1024
+        cmp     r4, r10
+        movlt   r4, r10
+.L388:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_minus_2_loop
+        str     r3, [r5, #40]           @ else store left channel and exit
+        b       common_exit
+/*
+ ******************************************************************************
+ * Loop to handle term = -3 condition
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 = 
+ * r2 = current left sample     r10 = -1024 (for clipping)
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = intermediate result
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr =
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+term_minus_3:
+        ldr     r3, [r1, #-4]           @ load previous samples
+        ldr     r8, [r1, #-8]
+term_minus_3_loop:
+        ldr     ip, [r1]
+        mla     r2, r3, r4, r11
+        add     r2, ip, r2, asr #10
+        str     r2, [r1], #4
+        cmp     r3, #0
+        cmpne   ip, #0
+        beq     .L399
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+        cmp     r4, #1024               @ then clip weight to +/-1024
+        movgt   r4, #1024
+        cmp     r4, r10
+        movlt   r4, r10
+.L399:  movs    ip, r8                  @ ip = previous left we use now
+        mov     r8, r2                  @ r8 = current left we use next time
+        ldr     r2, [r1], #4
+        mla     r3, ip, r0, r11
+        add     r3, r2, r3, asr #10
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L407
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+        cmp     r0, #1024
+        movgt   r0, #1024
+        cmp     r0, r10
+        movlt   r0, r10
+.L407:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_minus_3_loop
+        str     r3, [r5, #8]            @ else store previous samples & exit
+        str     r8, [r5, #40]
+/*
+ * Before finally exiting we must store weights back for next time
+ */
+common_exit:
+        strh    r4, [r5, #4]
+        strh    r0, [r5, #6]
+        ldmpc   regs="r4-r8, r10-r11"

diff --git a/lib/rbcodec/codecs/libwavpack/arm.S b/lib/rbcodec/codecs/libwavpack/arm.S new file mode 100644 index 0000000000..32de1df7de --- /dev/null +++ b/lib/rbcodec/codecs/libwavpack/arm.S
@@ -0,0 +1,477 @@
	1	/***************************************************************************
	2	* __________ __ ___.
	3	* Open \______ \ ____ ____ \| \| _\_ \|__ _______ ___
	4	* Source \| _// _ \_/ ___\\| \|/ /\| __ \ / _ \ \/ /
	5	* Jukebox \| \| ( <_> ) \___\| < \| \_\ ( <_> > < <
	6	* Firmware \|____\|_ /\____/ \___ >__\|_ \\|___ /\____/__/\_ \
	7	* \/ \/ \/ \/ \/
	8	* $Id$
	9	*
	10	* Copyright (C) 2006 by David Bryant
	11	*
	12	* This program is free software; you can redistribute it and/or
	13	* modify it under the terms of the GNU General Public License
	14	* as published by the Free Software Foundation; either version 2
	15	* of the License, or (at your option) any later version.
	16	*
	17	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
	18	* KIND, either express or implied.
	19	*
	20	****************************************************************************/
	21
	22	/* This is an assembly optimized version of the following WavPack function:
	23	*
	24	* void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp,
	25	* long *buffer, long sample_count);
	26	*
	27	* It performs a single pass of stereo decorrelation on the provided buffer.
	28	* Note that this version of the function requires that the 8 previous stereo
	29	* samples are visible and correct. In other words, it ignores the "samples_*"
	30	* fields in the decorr_pass structure and gets the history data directly
	31	* from the buffer. It does, however, return the appropriate history samples
	32	* to the decorr_pass structure before returning.
	33	*
	34	* This is written to work on a ARM7TDMI processor. This version only uses the
	35	* 32-bit multiply-accumulate instruction and so will overflow with 24-bit
	36	* WavPack files.
	37	*/
	38
	39	#include "config.h"
	40
	41	.text
	42	.align
	43	.global decorr_stereo_pass_cont_arm
	44
	45	/*
	46	* on entry:
	47	*
	48	* r0 = struct decorr_pass *dpp
	49	* r1 = long *buffer
	50	* r2 = long sample_count
	51	*/
	52
	53	decorr_stereo_pass_cont_arm:
	54
	55	stmfd sp!, {r4 - r8, r10, r11, lr}
	56	mov r5, r0 @ r5 = dpp
	57	mov r11, #512 @ r11 = 512 for rounding
	58	ldrsh r6, [r0, #2] @ r6 = dpp->delta
	59	ldrsh r4, [r0, #4] @ r4 = dpp->weight_A
	60	ldrsh r0, [r0, #6] @ r0 = dpp->weight_B
	61	cmp r2, #0 @ exit if no samples to process
	62	beq common_exit
	63
	64	add r7, r1, r2, asl #3 @ r7 = buffer ending position
	65	ldrsh r2, [r5, #0] @ r2 = dpp->term
	66	cmp r2, #0
	67	bmi minus_term
	68
	69	ldr lr, [r1, #-16] @ load 2 sample history from buffer
	70	ldr r10, [r1, #-12] @ for terms 2, 17, and 18
	71	ldr r8, [r1, #-8]
	72	ldr r3, [r1, #-4]
	73	cmp r2, #17
	74	beq term_17_loop
	75	cmp r2, #18
	76	beq term_18_loop
	77	cmp r2, #2
	78	beq term_2_loop
	79	b term_default_loop @ else handle default (1-8, except 2)
	80
	81	minus_term:
	82	mov r10, #1024 @ r10 = -1024 for weight clipping
	83	rsb r10, r10, #0 @ (only used for negative terms)
	84	cmn r2, #1
	85	beq term_minus_1
	86	cmn r2, #2
	87	beq term_minus_2
	88	cmn r2, #3
	89	beq term_minus_3
	90	b common_exit
	91
	92	/*
	93	******************************************************************************
	94	* Loop to handle term = 17 condition
	95	*
	96	* r0 = dpp->weight_B r8 = previous left sample
	97	* r1 = bptr r9 =
	98	* r2 = current sample r10 = second previous left sample
	99	* r3 = previous right sample r11 = 512 (for rounding)
	100	* r4 = dpp->weight_A ip = current decorrelation value
	101	* r5 = dpp sp =
	102	* r6 = dpp->delta lr = second previous right sample
	103	* r7 = eptr pc =
	104	*******************************************************************************
	105	*/
	106
	107	term_17_loop:
	108	rsbs ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev
	109	mov lr, r8 @ previous becomes 2nd previous
	110	ldr r2, [r1], #4 @ get sample & update pointer
	111	mla r8, ip, r4, r11 @ mult decorr value by weight, round,
	112	add r8, r2, r8, asr #10 @ shift, and add to new sample
	113	strne r8, [r1, #-4] @ if change possible, store sample back
	114	cmpne r2, #0
	115	beq .L325
	116	teq ip, r2 @ update weight based on signs
	117	submi r4, r4, r6
	118	addpl r4, r4, r6
	119
	120	.L325: rsbs ip, r10, r3, asl #1 @ do same thing for right channel
	121	mov r10, r3
	122	ldr r2, [r1], #4
	123	mla r3, ip, r0, r11
	124	add r3, r2, r3, asr #10
	125	strne r3, [r1, #-4]
	126	cmpne r2, #0
	127	beq .L329
	128	teq ip, r2
	129	submi r0, r0, r6
	130	addpl r0, r0, r6
	131
	132	.L329: cmp r7, r1 @ loop back if more samples to do
	133	bhi term_17_loop
	134	b store_1718 @ common exit for terms 17 & 18
	135
	136	/*
	137	******************************************************************************
	138	* Loop to handle term = 18 condition
	139	*
	140	* r0 = dpp->weight_B r8 = previous left sample
	141	* r1 = bptr r9 =
	142	* r2 = current sample r10 = second previous left sample
	143	* r3 = previous right sample r11 = 512 (for rounding)
	144	* r4 = dpp->weight_A ip = decorrelation value
	145	* r5 = dpp sp =
	146	* r6 = dpp->delta lr = second previous right sample
	147	* r7 = eptr pc =
	148	*******************************************************************************
	149	*/
	150
	151	term_18_loop:
	152	sub ip, r8, lr @ decorr value =
	153	mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1
	154	adds ip, r8, ip, asr #1
	155	ldr r2, [r1], #4 @ get sample & update pointer
	156	mla r8, ip, r4, r11 @ mult decorr value by weight, round,
	157	add r8, r2, r8, asr #10 @ shift, and add to new sample
	158	strne r8, [r1, #-4] @ if change possible, store sample back
	159	cmpne r2, #0
	160	beq .L337
	161	teq ip, r2 @ update weight based on signs
	162	submi r4, r4, r6
	163	addpl r4, r4, r6
	164
	165	.L337: sub ip, r3, r10 @ do same thing for right channel
	166	mov r10, r3
	167	adds ip, r3, ip, asr #1
	168	ldr r2, [r1], #4
	169	mla r3, ip, r0, r11
	170	add r3, r2, r3, asr #10
	171	strne r3, [r1, #-4]
	172	cmpne r2, #0
	173	beq .L341
	174	teq ip, r2
	175	submi r0, r0, r6
	176	addpl r0, r0, r6
	177
	178	.L341: cmp r7, r1 @ loop back if more samples to do
	179	bhi term_18_loop
	180
	181	/* common exit for terms 17 & 18 */
	182
	183	store_1718:
	184	str r3, [r5, #40] @ store sample history into struct
	185	str r8, [r5, #8]
	186	str r10, [r5, #44]
	187	str lr, [r5, #12]
	188	b common_exit @ and return
	189
	190	/*
	191	******************************************************************************
	192	* Loop to handle term = 2 condition
	193	* (note that this case can be handled by the default term handler (1-8), but
	194	* this special case is faster because it doesn't have to read memory twice)
	195	*
	196	* r0 = dpp->weight_B r8 = previous left sample
	197	* r1 = bptr r9 =
	198	* r2 = current sample r10 = second previous left sample
	199	* r3 = previous right sample r11 = 512 (for rounding)
	200	* r4 = dpp->weight_A ip = decorrelation value
	201	* r5 = dpp sp =
	202	* r6 = dpp->delta lr = second previous right sample
	203	* r7 = eptr pc =
	204	*******************************************************************************
	205	*/
	206
	207	term_2_loop:
	208	movs ip, lr @ get decorrelation value & test
	209	mov lr, r8 @ previous becomes 2nd previous
	210	ldr r2, [r1], #4 @ get sample & update pointer
	211	mla r8, ip, r4, r11 @ mult decorr value by weight, round,
	212	add r8, r2, r8, asr #10 @ shift, and add to new sample
	213	strne r8, [r1, #-4] @ if change possible, store sample back
	214	cmpne r2, #0
	215	beq .L225
	216	teq ip, r2 @ update weight based on signs
	217	submi r4, r4, r6
	218	addpl r4, r4, r6
	219
	220	.L225: movs ip, r10 @ do same thing for right channel
	221	mov r10, r3
	222	ldr r2, [r1], #4
	223	mla r3, ip, r0, r11
	224	add r3, r2, r3, asr #10
	225	strne r3, [r1, #-4]
	226	cmpne r2, #0
	227	beq .L229
	228	teq ip, r2
	229	submi r0, r0, r6
	230	addpl r0, r0, r6
	231
	232	.L229: cmp r7, r1 @ loop back if more samples to do
	233	bhi term_2_loop
	234	b default_term_exit @ this exit updates all dpp->samples
	235
	236	/*
	237	******************************************************************************
	238	* Loop to handle default term condition
	239	*
	240	* r0 = dpp->weight_B r8 = result accumulator
	241	* r1 = bptr r9 =
	242	* r2 = dpp->term r10 =
	243	* r3 = decorrelation value r11 = 512 (for rounding)
	244	* r4 = dpp->weight_A ip = current sample
	245	* r5 = dpp sp =
	246	* r6 = dpp->delta lr =
	247	* r7 = eptr pc =
	248	*******************************************************************************
	249	*/
	250
	251	term_default_loop:
	252	ldr ip, [r1] @ get original sample
	253	ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term
	254	mla r8, r3, r4, r11 @ mult decorr value by weight, round,
	255	add r8, ip, r8, asr #10 @ shift and add to new sample
	256	str r8, [r1], #4 @ store update sample
	257	cmp r3, #0
	258	cmpne ip, #0
	259	beq .L350
	260	teq ip, r3 @ update weight based on signs
	261	submi r4, r4, r6
	262	addpl r4, r4, r6
	263
	264	.L350: ldr ip, [r1] @ do the same thing for right channel
	265	ldr r3, [r1, -r2, asl #3]
	266	mla r8, r3, r0, r11
	267	add r8, ip, r8, asr #10
	268	str r8, [r1], #4
	269	cmp r3, #0
	270	cmpne ip, #0
	271	beq .L354
	272	teq ip, r3
	273	submi r0, r0, r6
	274	addpl r0, r0, r6
	275
	276	.L354: cmp r7, r1 @ loop back if more samples to do
	277	bhi term_default_loop
	278
	279	/*
	280	* This exit is used by terms 1-8 to store the previous 8 samples into the decorr
	281	* structure (even if they are not all used for the given term)
	282	*/
	283
	284	default_term_exit:
	285	ldrsh r3, [r5, #0]
	286	sub ip, r3, #1
	287	mov lr, #7
	288
	289	.L358: and r3, ip, #7
	290	add r3, r5, r3, asl #2
	291	ldr r2, [r1, #-4]
	292	str r2, [r3, #40]
	293	ldr r2, [r1, #-8]!
	294	str r2, [r3, #8]
	295	sub ip, ip, #1
	296	sub lr, lr, #1
	297	cmn lr, #1
	298	bne .L358
	299	b common_exit
	300
	301	/*
	302	******************************************************************************
	303	* Loop to handle term = -1 condition
	304	*
	305	* r0 = dpp->weight_B r8 =
	306	* r1 = bptr r9 =
	307	* r2 = intermediate result r10 = -1024 (for clipping)
	308	* r3 = previous right sample r11 = 512 (for rounding)
	309	* r4 = dpp->weight_A ip = current sample
	310	* r5 = dpp sp =
	311	* r6 = dpp->delta lr = updated left sample
	312	* r7 = eptr pc =
	313	*******************************************************************************
	314	*/
	315
	316	term_minus_1:
	317	ldr r3, [r1, #-4]
	318
	319	term_minus_1_loop:
	320	ldr ip, [r1] @ for left channel the decorrelation value
	321	mla r2, r3, r4, r11 @ is the previous right sample (in r3)
	322	add lr, ip, r2, asr #10
	323	str lr, [r1], #8
	324	cmp r3, #0
	325	cmpne ip, #0
	326	beq .L361
	327	teq ip, r3 @ update weight based on signs
	328	submi r4, r4, r6
	329	addpl r4, r4, r6
	330	cmp r4, #1024
	331	movgt r4, #1024
	332	cmp r4, r10
	333	movlt r4, r10
	334
	335	.L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value
	336	mla r3, lr, r0, r11 @ is the just updated right sample (in lr)
	337	add r3, r2, r3, asr #10
	338	str r3, [r1, #-4]
	339	cmp lr, #0
	340	cmpne r2, #0
	341	beq .L369
	342	teq r2, lr
	343	submi r0, r0, r6
	344	addpl r0, r0, r6
	345	cmp r0, #1024 @ then clip weight to +/-1024
	346	movgt r0, #1024
	347	cmp r0, r10
	348	movlt r0, r10
	349
	350	.L369: cmp r7, r1 @ loop back if more samples to do
	351	bhi term_minus_1_loop
	352
	353	str r3, [r5, #8] @ else store right sample and exit
	354	b common_exit
	355
	356	/*
	357	******************************************************************************
	358	* Loop to handle term = -2 condition
	359	* (note that the channels are processed in the reverse order here)
	360	*
	361	* r0 = dpp->weight_B r8 =
	362	* r1 = bptr r9 =
	363	* r2 = intermediate result r10 = -1024 (for clipping)
	364	* r3 = previous left sample r11 = 512 (for rounding)
	365	* r4 = dpp->weight_A ip = current sample
	366	* r5 = dpp sp =
	367	* r6 = dpp->delta lr = updated right sample
	368	* r7 = eptr pc =
	369	*******************************************************************************
	370	*/
	371
	372	term_minus_2:
	373	ldr r3, [r1, #-8]
	374
	375	term_minus_2_loop:
	376	ldr ip, [r1, #4] @ for right channel the decorrelation value
	377	mla r2, r3, r0, r11 @ is the previous left sample (in r3)
	378	add lr, ip, r2, asr #10
	379	str lr, [r1, #4]
	380	cmp r3, #0
	381	cmpne ip, #0
	382	beq .L380
	383	teq ip, r3 @ update weight based on signs
	384	submi r0, r0, r6
	385	addpl r0, r0, r6
	386	cmp r0, #1024 @ then clip weight to +/-1024
	387	movgt r0, #1024
	388	cmp r0, r10
	389	movlt r0, r10
	390
	391	.L380: ldr r2, [r1, #0] @ for left channel the decorrelation value
	392	mla r3, lr, r4, r11 @ is the just updated left sample (in lr)
	393	add r3, r2, r3, asr #10
	394	str r3, [r1], #8
	395	cmp lr, #0
	396	cmpne r2, #0
	397	beq .L388
	398	teq r2, lr
	399	submi r4, r4, r6
	400	addpl r4, r4, r6
	401	cmp r4, #1024
	402	movgt r4, #1024
	403	cmp r4, r10
	404	movlt r4, r10
	405
	406	.L388: cmp r7, r1 @ loop back if more samples to do
	407	bhi term_minus_2_loop
	408
	409	str r3, [r5, #40] @ else store left channel and exit
	410	b common_exit
	411
	412	/*
	413	******************************************************************************
	414	* Loop to handle term = -3 condition
	415	*
	416	* r0 = dpp->weight_B r8 = previous left sample
	417	* r1 = bptr r9 =
	418	* r2 = current left sample r10 = -1024 (for clipping)
	419	* r3 = previous right sample r11 = 512 (for rounding)
	420	* r4 = dpp->weight_A ip = intermediate result
	421	* r5 = dpp sp =
	422	* r6 = dpp->delta lr =
	423	* r7 = eptr pc =
	424	*******************************************************************************
	425	*/
	426
	427	term_minus_3:
	428	ldr r3, [r1, #-4] @ load previous samples
	429	ldr r8, [r1, #-8]
	430
	431	term_minus_3_loop:
	432	ldr ip, [r1]
	433	mla r2, r3, r4, r11
	434	add r2, ip, r2, asr #10
	435	str r2, [r1], #4
	436	cmp r3, #0
	437	cmpne ip, #0
	438	beq .L399
	439	teq ip, r3 @ update weight based on signs
	440	submi r4, r4, r6
	441	addpl r4, r4, r6
	442	cmp r4, #1024 @ then clip weight to +/-1024
	443	movgt r4, #1024
	444	cmp r4, r10
	445	movlt r4, r10
	446
	447	.L399: movs ip, r8 @ ip = previous left we use now
	448	mov r8, r2 @ r8 = current left we use next time
	449	ldr r2, [r1], #4
	450	mla r3, ip, r0, r11
	451	add r3, r2, r3, asr #10
	452	strne r3, [r1, #-4]
	453	cmpne r2, #0
	454	beq .L407
	455	teq ip, r2
	456	submi r0, r0, r6
	457	addpl r0, r0, r6
	458	cmp r0, #1024
	459	movgt r0, #1024
	460	cmp r0, r10
	461	movlt r0, r10
	462
	463	.L407: cmp r7, r1 @ loop back if more samples to do
	464	bhi term_minus_3_loop
	465
	466	str r3, [r5, #8] @ else store previous samples & exit
	467	str r8, [r5, #40]
	468
	469	/*
	470	* Before finally exiting we must store weights back for next time
	471	*/
	472
	473	common_exit:
	474	strh r4, [r5, #4]
	475	strh r0, [r5, #6]
	476	ldmpc regs="r4-r8, r10-r11"
	477