1 files changed, 164 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libtta/filter_coldfire.S b/lib/rbcodec/codecs/libtta/filter_coldfire.S
new file mode 100644
index 0000000000..3950eb52e6
--- /dev/null
+++ b/lib/rbcodec/codecs/libtta/filter_coldfire.S
@@ -0,0 +1,164 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2010 Nils Wallménius
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+/*
+ * The following is an assembler optimised version of
+ * void hybrid_filter(fltst *fs, int *in)
+ */
+#if defined(USE_IRAM)
+    .section .icode
+#else
+    .text
+#endif
+    .align 2
+    .global     hybrid_filter
+    .type       hybrid_filter, @function
+hybrid_filter:
+    lea.l    (-8*4, %sp), %sp
+    movem.l  %d2-%d7/%a2-%a3, (%sp)       | save some registers
+    move.l   (8*4+4, %sp), %a0            | a0 = fs
+    movem.l  (%a0), %d4-%d5               | d4 = fs->index, d5 = fs->error
+    lea.l    (%a0, %d4.l*4), %a2
+    lea.l    (148, %a2), %a1              | a1 = fs->dl + fs->index (*pA)
+    lea.l    (52, %a2), %a2               | a2 = fs->dx + fs->index (*pM)
+    move.l   (%a1)+, %a3                  | load one value from *pA (needed in every case)
+    movem.l  (20, %a0), %d0-%d3           | load 4 values from *pB
+    tst.l    %d5
+    blt      .hf_negative
+    bgt      .hf_positive
+                                          | fs->error == 0
+    mac.l    %d0, %a3, (%a1)+, %a3, %acc0
+    mac.l    %d1, %a3, (%a1)+, %a3, %acc0
+    mac.l    %d2, %a3, (%a1)+, %a3, %acc0
+    mac.l    %d3, %a3, (%a1)+, %d4, %acc0
+    movem.l  (4*4+20, %a0), %d0-%d3       | load 4 values from *pB
+    bra      0f
+    .hf_negative:                         | fs->error < 0
+    movem.l  (%a2), %d4-%d7               | load 4 values from *pM
+    sub.l    %d4, %d0
+    sub.l    %d5, %d1
+    sub.l    %d6, %d2
+    sub.l    %d7, %d3
+    movem.l  %d0-%d3, (20, %a0)
+    mac.l    %d0, %a3, (%a1)+, %a3, %acc0
+    mac.l    %d1, %a3, (%a1)+, %a3, %acc0
+    mac.l    %d2, %a3, (%a1)+, %a3, %acc0
+    mac.l    %d3, %a3, (%a1)+, %d4, %acc0
+    movem.l  (4*4+20, %a0), %d0-%d3       | load 4 values from *pB
+    movem.l  (4*4, %a2), %d5-%d7/%a3      | load 4 values from *pM
+    sub.l    %d5, %d0
+    sub.l    %d6, %d1
+    sub.l    %d7, %d2
+    sub.l    %a3, %d3
+    movem.l  %d0-%d3, (4*4+20, %a0)
+    bra      0f
+    .hf_positive:                         | fs->error > 0
+    movem.l  (%a2), %d4-%d7               | load 4 values from *pM
+    add.l    %d4, %d0
+    add.l    %d5, %d1
+    add.l    %d6, %d2
+    add.l    %d7, %d3
+    movem.l  %d0-%d3, (20, %a0)
+    mac.l    %d0, %a3, (%a1)+, %a3, %acc0
+    mac.l    %d1, %a3, (%a1)+, %a3, %acc0
+    mac.l    %d2, %a3, (%a1)+, %a3, %acc0
+    mac.l    %d3, %a3, (%a1)+, %d4, %acc0
+    movem.l  (4*4+20, %a0), %d0-%d3       | load 4 values from *pB
+    movem.l  (4*4, %a2), %d5-%d7/%a3      | load 4 values from *pM
+    add.l    %d5, %d0
+    add.l    %d6, %d1
+    add.l    %d7, %d2
+    add.l    %a3, %d3
+    movem.l  %d0-%d3, (4*4+20, %a0)
+    0:
+    mac.l    %d0, %d4, (%a1)+, %d5, %acc0 | common macro block
+    mac.l    %d1, %d5, (%a1)+, %d6, %acc0
+    mac.l    %d2, %d6, (%a1),  %d7, %acc0
+    mac.l    %d3, %d7, %acc0
+    move.l   (8*4+8, %sp), %a3            | a3 = in
+    move.l   (%a3), %d3
+    move.l   %d3, (4, %a0)                | fs->error = *in
+    movclr.l %acc0, %d0                   | d0 = sum
+    movem.l  (8,  %a0), %d1-%d2
+    add.l    %d1, %d0                     | sum +=  fs->round
+    asr.l    %d2, %d0                     | sum >>= fs->shift
+    add.l    %d0, %d3
+    move.l   %d3, (%a3)                   | *in += (sum >> fs->shift)
+    move.l   %d3, ( 1*4, %a1)
+    sub.l    %d7, %d3
+    move.l   %d3, ( 0*4, %a1)
+    sub.l    %d6, %d3
+    move.l   %d3, (-1*4, %a1)
+    sub.l    %d5, %d3
+    move.l   %d3, (-2*4, %a1)
+    moveq    #30,%d0
+    asr.l    %d0,%d7
+    asr.l    %d0,%d6
+    asr.l    %d0,%d5
+    asr.l    %d0,%d4
+    moveq    #1,%d0
+    or.l     %d0,%d7
+    or.l     %d0,%d6
+    or.l     %d0,%d5
+    or.l     %d0,%d4
+    lsl.l    #2,%d7
+    lsl.l    #1,%d6
+    lsl.l    #1,%d5
+    movem.l  %d4-%d7, (8*4-3*4,%a2)       | store to *pM
+    move.l   (%a0), %d0
+    addq.l   #1, %d0
+    cmp.l    #16, %d0                     | ++fs->index == 16 ?
+    bne      1f
+    movem.l  (16*4+148, %a0), %d0-%d7
+    movem.l  %d0-%d7, (148, %a0)
+    movem.l  (16*4+52, %a0), %d0-%d7
+    movem.l  %d0-%d7, (52, %a0)
+    clr.l    %d0                          | fs->index = 0
+    1:
+    move.l   %d0, (%a0)
+    movem.l  (%sp), %d2-%d7/%a2-%a3       | restore stacked regs
+    lea.l    (8*4, %sp), %sp
+    rts

diff --git a/lib/rbcodec/codecs/libtta/filter_coldfire.S b/lib/rbcodec/codecs/libtta/filter_coldfire.S new file mode 100644 index 0000000000..3950eb52e6 --- /dev/null +++ b/lib/rbcodec/codecs/libtta/filter_coldfire.S
@@ -0,0 +1,164 @@
	1	/***************************************************************************
	2	* __________ __ ___.
	3	* Open \______ \ ____ ____ \| \| _\_ \|__ _______ ___
	4	* Source \| _// _ \_/ ___\\| \|/ /\| __ \ / _ \ \/ /
	5	* Jukebox \| \| ( <_> ) \___\| < \| \_\ ( <_> > < <
	6	* Firmware \|____\|_ /\____/ \___ >__\|_ \\|___ /\____/__/\_ \
	7	* \/ \/ \/ \/ \/
	8	* $Id$
	9	*
	10	* Copyright (C) 2010 Nils Wallménius
	11	*
	12	* This program is free software; you can redistribute it and/or
	13	* modify it under the terms of the GNU General Public License
	14	* as published by the Free Software Foundation; either version 2
	15	* of the License, or (at your option) any later version.
	16	*
	17	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
	18	* KIND, either express or implied.
	19	*
	20	****************************************************************************/
	21
	22	#include "config.h"
	23
	24	/*
	25	* The following is an assembler optimised version of
	26	* void hybrid_filter(fltst fs, int in)
	27	*/
	28
	29	#if defined(USE_IRAM)
	30	.section .icode
	31	#else
	32	.text
	33	#endif
	34	.align 2
	35	.global hybrid_filter
	36	.type hybrid_filter, @function
	37
	38	hybrid_filter:
	39	lea.l (-8*4, %sp), %sp
	40	movem.l %d2-%d7/%a2-%a3, (%sp) \| save some registers
	41	move.l (8*4+4, %sp), %a0 \| a0 = fs
	42	movem.l (%a0), %d4-%d5 \| d4 = fs->index, d5 = fs->error
	43
	44	lea.l (%a0, %d4.l*4), %a2
	45	lea.l (148, %a2), %a1 \| a1 = fs->dl + fs->index (*pA)
	46	lea.l (52, %a2), %a2 \| a2 = fs->dx + fs->index (*pM)
	47
	48	move.l (%a1)+, %a3 \| load one value from *pA (needed in every case)
	49	movem.l (20, %a0), %d0-%d3 \| load 4 values from *pB
	50
	51	tst.l %d5
	52	blt .hf_negative
	53	bgt .hf_positive
	54
	55	\| fs->error == 0
	56	mac.l %d0, %a3, (%a1)+, %a3, %acc0
	57	mac.l %d1, %a3, (%a1)+, %a3, %acc0
	58	mac.l %d2, %a3, (%a1)+, %a3, %acc0
	59	mac.l %d3, %a3, (%a1)+, %d4, %acc0
	60	movem.l (44+20, %a0), %d0-%d3 \| load 4 values from pB
	61	bra 0f
	62
	63	.hf_negative: \| fs->error < 0
	64	movem.l (%a2), %d4-%d7 \| load 4 values from *pM
	65	sub.l %d4, %d0
	66	sub.l %d5, %d1
	67	sub.l %d6, %d2
	68	sub.l %d7, %d3
	69	movem.l %d0-%d3, (20, %a0)
	70	mac.l %d0, %a3, (%a1)+, %a3, %acc0
	71	mac.l %d1, %a3, (%a1)+, %a3, %acc0
	72	mac.l %d2, %a3, (%a1)+, %a3, %acc0
	73	mac.l %d3, %a3, (%a1)+, %d4, %acc0
	74
	75	movem.l (44+20, %a0), %d0-%d3 \| load 4 values from pB
	76	movem.l (44, %a2), %d5-%d7/%a3 \| load 4 values from pM
	77	sub.l %d5, %d0
	78	sub.l %d6, %d1
	79	sub.l %d7, %d2
	80	sub.l %a3, %d3
	81	movem.l %d0-%d3, (4*4+20, %a0)
	82	bra 0f
	83
	84	.hf_positive: \| fs->error > 0
	85	movem.l (%a2), %d4-%d7 \| load 4 values from *pM
	86	add.l %d4, %d0
	87	add.l %d5, %d1
	88	add.l %d6, %d2
	89	add.l %d7, %d3
	90	movem.l %d0-%d3, (20, %a0)
	91	mac.l %d0, %a3, (%a1)+, %a3, %acc0
	92	mac.l %d1, %a3, (%a1)+, %a3, %acc0
	93	mac.l %d2, %a3, (%a1)+, %a3, %acc0
	94	mac.l %d3, %a3, (%a1)+, %d4, %acc0
	95
	96	movem.l (44+20, %a0), %d0-%d3 \| load 4 values from pB
	97	movem.l (44, %a2), %d5-%d7/%a3 \| load 4 values from pM
	98	add.l %d5, %d0
	99	add.l %d6, %d1
	100	add.l %d7, %d2
	101	add.l %a3, %d3
	102	movem.l %d0-%d3, (4*4+20, %a0)
	103
	104	0:
	105
	106	mac.l %d0, %d4, (%a1)+, %d5, %acc0 \| common macro block
	107	mac.l %d1, %d5, (%a1)+, %d6, %acc0
	108	mac.l %d2, %d6, (%a1), %d7, %acc0
	109	mac.l %d3, %d7, %acc0
	110
	111	move.l (8*4+8, %sp), %a3 \| a3 = in
	112	move.l (%a3), %d3
	113	move.l %d3, (4, %a0) \| fs->error = *in
	114	movclr.l %acc0, %d0 \| d0 = sum
	115	movem.l (8, %a0), %d1-%d2
	116	add.l %d1, %d0 \| sum += fs->round
	117	asr.l %d2, %d0 \| sum >>= fs->shift
	118
	119	add.l %d0, %d3
	120	move.l %d3, (%a3) \| *in += (sum >> fs->shift)
	121
	122	move.l %d3, ( 1*4, %a1)
	123	sub.l %d7, %d3
	124	move.l %d3, ( 0*4, %a1)
	125	sub.l %d6, %d3
	126	move.l %d3, (-1*4, %a1)
	127	sub.l %d5, %d3
	128	move.l %d3, (-2*4, %a1)
	129
	130	moveq #30,%d0
	131	asr.l %d0,%d7
	132	asr.l %d0,%d6
	133	asr.l %d0,%d5
	134	asr.l %d0,%d4
	135
	136	moveq #1,%d0
	137	or.l %d0,%d7
	138	or.l %d0,%d6
	139	or.l %d0,%d5
	140	or.l %d0,%d4
	141
	142	lsl.l #2,%d7
	143	lsl.l #1,%d6
	144	lsl.l #1,%d5
	145	movem.l %d4-%d7, (84-34,%a2) \| store to *pM
	146
	147	move.l (%a0), %d0
	148	addq.l #1, %d0
	149	cmp.l #16, %d0 \| ++fs->index == 16 ?
	150	bne 1f
	151
	152	movem.l (16*4+148, %a0), %d0-%d7
	153	movem.l %d0-%d7, (148, %a0)
	154	movem.l (16*4+52, %a0), %d0-%d7
	155	movem.l %d0-%d7, (52, %a0)
	156	clr.l %d0 \| fs->index = 0
	157	1:
	158
	159	move.l %d0, (%a0)
	160
	161	movem.l (%sp), %d2-%d7/%a2-%a3 \| restore stacked regs
	162	lea.l (8*4, %sp), %sp
	163	rts
	164