1 files changed, 128 insertions, 0 deletions
diff --git a/apps/codecs/libmad/imdct_mcf5249.S b/apps/codecs/libmad/imdct_mcf5249.S
new file mode 100644
index 0000000000..be0072f674
--- /dev/null
+++ b/apps/codecs/libmad/imdct_mcf5249.S
@@ -0,0 +1,128 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 by Thom Johansen
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+/* this will also be the home to III_imdct_l in the future */
+    .global III_imdct_s
+III_imdct_s:
+    /* we need to save 9 registers and 36 samples of temp buffer */
+    lea.l (-45*4, %sp), %sp
+    movem.l %d2-%d7/%a2-%a4, (36*4, %sp)
+    move.l (45*4 + 4, %sp), %a2  /* a2 = X */
+    move.l %sp, %a3
+  
+    /* IMDCT */
+    /* if additional precision is needed in this block, it is possible to
+     * get more low bits out of the accext01 register _before_ doing the
+     * movclrs.
+     */
+    move.l #0xb0, %macsr                /* frac mode, saturation, rounding */
+    sub.l %a0, %a0                      /* clear loop variable */
+    .align 2
+.imdctloop:                             /* outer loop label */
+    lea.l imdct_s, %a1                  /* load pointer to imdct coefs in a1 */
+    movem.l (%a2), %d0-%d5              /* load some input data in d0-d5 */
+    lea.l (6*4, %a2), %a2
+    clr.l %d7                           /* clear loop variable */
+    move.l (%a1)+, %a4                  /* load imdct coef in a4 */
+    .align 2
+.macloop:                               /* inner loop label */
+    mac.l %d0, %a4, (%a1)+, %a4, %acc0  /* mac sequence */
+    mac.l %d1, %a4, (%a1)+, %a4, %acc0
+    mac.l %d2, %a4, (%a1)+, %a4, %acc0
+    mac.l %d3, %a4, (%a1)+, %a4, %acc0
+    mac.l %d4, %a4, (%a1)+, %a4, %acc0
+    mac.l %d5, %a4, (%a1)+, %a4, %acc0
+    movclr.l %acc0, %d6                 /* get result, left shifted once */
+    asl.l #3, %d6                       /* one shift free, shift three more */
+    move.l %d6, (%a3, %d7.l*4)          /* yptr[i] = result */
+    neg.l %d6
+    neg.l %d7
+    move.l %d6, (5*4, %a3, %d7.l*4)     /* yptr[5 - i] = -result */
+    mac.l %d0, %a4, (%a1)+, %a4, %acc0  /* mac sequence */
+    mac.l %d1, %a4, (%a1)+, %a4, %acc0
+    mac.l %d2, %a4, (%a1)+, %a4, %acc0
+    mac.l %d3, %a4, (%a1)+, %a4, %acc0
+    mac.l %d4, %a4, (%a1)+, %a4, %acc0
+    mac.l %d5, %a4, (%a1)+, %a4, %acc0
+    movclr.l %acc0, %d6                 /* get result */
+    asl.l #3, %d6
+    move.l %d6, (11*4, %a3, %d7.l*4)    /* yptr[11 - i] = result */
+    neg.l %d7
+    move.l %d6, (6*4, %a3, %d7.l*4)     /* yptr[i + 6] = result */
+    addq.l #1, %d7                      /* increment inner loop variable */
+    moveq.l #3, %d6
+    cmp.l %d6, %d7                      /* we do three inner loop iterations */
+    jne .macloop
+    lea.l (12*4, %a3), %a3              /* add pointer increment */
+    addq.l #1, %a0                      /* increment outer loop variable */
+    moveq.l #3, %d0
+    cmp.l %d0, %a0                      /* we do three outer loop iterations */
+    jne .imdctloop
+    /* windowing, overlapping and concatenation */
+    move.l (45*4 + 8, %sp), %a2       /* a2 = z */
+    move.l %sp, %a3                   /* a3 = tmp buffer ptr */
+    lea.l window_s, %a4               /* a4 = window coef pointer */
+    moveq.l #6, %d7                   /* six iterations */
+    .align 2
+.overlaploop:
+    clr.l (%a2)                       /* z[i + 0] = 0 */
+    move.l (%a4), %d0
+    move.l (%a3), %d2
+    mac.l %d0, %d2, (6*4, %a4), %d1, %acc0
+    move.l (6*4, %a3), %d2
+    movclr.l %acc0, %d6
+    asl.l #3, %d6
+    move.l %d6, (6*4, %a2)            /* z[i + 6] = result */
+    mac.l %d1, %d2, (12*4, %a3), %d2, %acc0
+    mac.l %d0, %d2, (18*4, %a3), %d2, %acc0
+    movclr.l %acc0, %d6
+    asl.l #3, %d6
+    move.l %d6, (12*4, %a2)           /* z[i + 12] = result */
+    mac.l %d1, %d2, (24*4, %a3), %d2, %acc0
+    mac.l %d0, %d2, (30*4, %a3), %d2, %acc0
+    movclr.l %acc0, %d6
+    asl.l #3, %d6
+    move.l %d6, (18*4, %a2)           /* z[i + 18] = result */
+    mac.l %d1, %d2, %acc0
+    movclr.l %acc0, %d6
+    asl.l #3, %d6
+    move.l %d6, (24*4, %a2)           /* z[i + 24] = result */
+    clr.l (30*4, %a2)                 /* z[i + 30] = 0 */
+    addq.l #4, %a2                    /* increment all pointers */
+    addq.l #4, %a3
+    addq.l #4, %a4
+    subq.l #1, %d7                    /* decrement loop counter */
+    jne .overlaploop
+    /* fall through to exit if we're done */
+    
+    /* clean up */
+    movem.l (36*4, %sp), %d2-%d7/%a2-%a4
+    lea.l (45*4, %sp), %sp
+    rts

diff --git a/apps/codecs/libmad/imdct_mcf5249.S b/apps/codecs/libmad/imdct_mcf5249.S new file mode 100644 index 0000000000..be0072f674 --- /dev/null +++ b/apps/codecs/libmad/imdct_mcf5249.S
@@ -0,0 +1,128 @@
	1	/***************************************************************************
	2	* __________ __ ___.
	3	* Open \______ \ ____ ____ \| \| _\_ \|__ _______ ___
	4	* Source \| _// _ \_/ ___\\| \|/ /\| __ \ / _ \ \/ /
	5	* Jukebox \| \| ( <_> ) \___\| < \| \_\ ( <_> > < <
	6	* Firmware \|____\|_ /\____/ \___ >__\|_ \\|___ /\____/__/\_ \
	7	* \/ \/ \/ \/ \/
	8	* $Id$
	9	*
	10	* Copyright (C) 2005 by Thom Johansen
	11	*
	12	* All files in this archive are subject to the GNU General Public License.
	13	* See the file COPYING in the source tree root for full license agreement.
	14	*
	15	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
	16	* KIND, either express or implied.
	17	*
	18	****************************************************************************/
	19	/* this will also be the home to III_imdct_l in the future */
	20
	21	.global III_imdct_s
	22	III_imdct_s:
	23	/* we need to save 9 registers and 36 samples of temp buffer */
	24	lea.l (-45*4, %sp), %sp
	25	movem.l %d2-%d7/%a2-%a4, (36*4, %sp)
	26	move.l (454 + 4, %sp), %a2 / a2 = X */
	27	move.l %sp, %a3
	28
	29	/* IMDCT */
	30
	31	/* if additional precision is needed in this block, it is possible to
	32	* get more low bits out of the accext01 register _before_ doing the
	33	* movclrs.
	34	*/
	35	move.l #0xb0, %macsr /* frac mode, saturation, rounding */
	36	sub.l %a0, %a0 /* clear loop variable */
	37	.align 2
	38	.imdctloop: /* outer loop label */
	39	lea.l imdct_s, %a1 /* load pointer to imdct coefs in a1 */
	40	movem.l (%a2), %d0-%d5 /* load some input data in d0-d5 */
	41	lea.l (6*4, %a2), %a2
	42
	43	clr.l %d7 /* clear loop variable */
	44	move.l (%a1)+, %a4 /* load imdct coef in a4 */
	45	.align 2
	46	.macloop: /* inner loop label */
	47	mac.l %d0, %a4, (%a1)+, %a4, %acc0 /* mac sequence */
	48	mac.l %d1, %a4, (%a1)+, %a4, %acc0
	49	mac.l %d2, %a4, (%a1)+, %a4, %acc0
	50	mac.l %d3, %a4, (%a1)+, %a4, %acc0
	51	mac.l %d4, %a4, (%a1)+, %a4, %acc0
	52	mac.l %d5, %a4, (%a1)+, %a4, %acc0
	53	movclr.l %acc0, %d6 /* get result, left shifted once */
	54	asl.l #3, %d6 /* one shift free, shift three more */
	55	move.l %d6, (%a3, %d7.l4) / yptr[i] = result */
	56	neg.l %d6
	57	neg.l %d7
	58	move.l %d6, (54, %a3, %d7.l4) /* yptr[5 - i] = -result */
	59	mac.l %d0, %a4, (%a1)+, %a4, %acc0 /* mac sequence */
	60	mac.l %d1, %a4, (%a1)+, %a4, %acc0
	61	mac.l %d2, %a4, (%a1)+, %a4, %acc0
	62	mac.l %d3, %a4, (%a1)+, %a4, %acc0
	63	mac.l %d4, %a4, (%a1)+, %a4, %acc0
	64	mac.l %d5, %a4, (%a1)+, %a4, %acc0
	65	movclr.l %acc0, %d6 /* get result */
	66	asl.l #3, %d6
	67	move.l %d6, (114, %a3, %d7.l4) /* yptr[11 - i] = result */
	68	neg.l %d7
	69	move.l %d6, (64, %a3, %d7.l4) /* yptr[i + 6] = result */
	70	addq.l #1, %d7 /* increment inner loop variable */
	71	moveq.l #3, %d6
	72	cmp.l %d6, %d7 /* we do three inner loop iterations */
	73	jne .macloop
	74
	75	lea.l (124, %a3), %a3 / add pointer increment */
	76	addq.l #1, %a0 /* increment outer loop variable */
	77	moveq.l #3, %d0
	78	cmp.l %d0, %a0 /* we do three outer loop iterations */
	79	jne .imdctloop
	80
	81	/* windowing, overlapping and concatenation */
	82
	83	move.l (454 + 8, %sp), %a2 / a2 = z */
	84	move.l %sp, %a3 /* a3 = tmp buffer ptr */
	85	lea.l window_s, %a4 /* a4 = window coef pointer */
	86
	87	moveq.l #6, %d7 /* six iterations */
	88	.align 2
	89	.overlaploop:
	90	clr.l (%a2) /* z[i + 0] = 0 */
	91	move.l (%a4), %d0
	92	move.l (%a3), %d2
	93	mac.l %d0, %d2, (6*4, %a4), %d1, %acc0
	94	move.l (6*4, %a3), %d2
	95	movclr.l %acc0, %d6
	96	asl.l #3, %d6
	97	move.l %d6, (64, %a2) / z[i + 6] = result */
	98
	99	mac.l %d1, %d2, (12*4, %a3), %d2, %acc0
	100	mac.l %d0, %d2, (18*4, %a3), %d2, %acc0
	101	movclr.l %acc0, %d6
	102	asl.l #3, %d6
	103	move.l %d6, (124, %a2) / z[i + 12] = result */
	104
	105	mac.l %d1, %d2, (24*4, %a3), %d2, %acc0
	106	mac.l %d0, %d2, (30*4, %a3), %d2, %acc0
	107	movclr.l %acc0, %d6
	108	asl.l #3, %d6
	109	move.l %d6, (184, %a2) / z[i + 18] = result */
	110
	111	mac.l %d1, %d2, %acc0
	112	movclr.l %acc0, %d6
	113	asl.l #3, %d6
	114	move.l %d6, (244, %a2) / z[i + 24] = result */
	115
	116	clr.l (304, %a2) / z[i + 30] = 0 */
	117	addq.l #4, %a2 /* increment all pointers */
	118	addq.l #4, %a3
	119	addq.l #4, %a4
	120	subq.l #1, %d7 /* decrement loop counter */
	121	jne .overlaploop
	122	/* fall through to exit if we're done */
	123
	124	/* clean up */
	125	movem.l (36*4, %sp), %d2-%d7/%a2-%a4
	126	lea.l (45*4, %sp), %sp
	127	rts
	128