From a4cad3d92684187d37c4034cbe185184719baaca Mon Sep 17 00:00:00 2001 From: Nils Wallménius Date: Mon, 12 Jul 2010 16:14:32 +0000 Subject: Coldfire assembler implementation of hybrid_filter for libtta. Speeds up decoding on h300 by 4.2MHz. Set svn properties. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27404 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libtta/SOURCES | 3 + apps/codecs/libtta/filter.h | 2 +- apps/codecs/libtta/filter_coldfire.S | 164 +++++++++++++++++++++++++++++++++++ apps/codecs/libtta/ttadec.c | 4 + 4 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 apps/codecs/libtta/filter_coldfire.S (limited to 'apps/codecs') diff --git a/apps/codecs/libtta/SOURCES b/apps/codecs/libtta/SOURCES index 35f2660dd7..0a8f1171eb 100644 --- a/apps/codecs/libtta/SOURCES +++ b/apps/codecs/libtta/SOURCES @@ -2,3 +2,6 @@ ttadec.c #ifdef CPU_ARM filter_arm.S #endif +#ifdef CPU_COLDFIRE +filter_coldfire.S +#endif diff --git a/apps/codecs/libtta/filter.h b/apps/codecs/libtta/filter.h index 6eef6dcf42..228757b9a0 100644 --- a/apps/codecs/libtta/filter.h +++ b/apps/codecs/libtta/filter.h @@ -42,7 +42,7 @@ ///////// Filter Settings ////////// static int flt_set[3] = {10, 9, 10}; -#ifdef CPU_ARM +#if defined(CPU_ARM) || defined(CPU_COLDFIRE) int hybrid_filter(fltst *fs, int *in); /* implements in filter_arm.S */ #else diff --git a/apps/codecs/libtta/filter_coldfire.S b/apps/codecs/libtta/filter_coldfire.S new file mode 100644 index 0000000000..3950eb52e6 --- /dev/null +++ b/apps/codecs/libtta/filter_coldfire.S @@ -0,0 +1,164 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2010 Nils Wallménius + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" + +/* + * The following is an assembler optimised version of + * void hybrid_filter(fltst *fs, int *in) + */ + +#if defined(USE_IRAM) + .section .icode +#else + .text +#endif + .align 2 + .global hybrid_filter + .type hybrid_filter, @function + +hybrid_filter: + lea.l (-8*4, %sp), %sp + movem.l %d2-%d7/%a2-%a3, (%sp) | save some registers + move.l (8*4+4, %sp), %a0 | a0 = fs + movem.l (%a0), %d4-%d5 | d4 = fs->index, d5 = fs->error + + lea.l (%a0, %d4.l*4), %a2 + lea.l (148, %a2), %a1 | a1 = fs->dl + fs->index (*pA) + lea.l (52, %a2), %a2 | a2 = fs->dx + fs->index (*pM) + + move.l (%a1)+, %a3 | load one value from *pA (needed in every case) + movem.l (20, %a0), %d0-%d3 | load 4 values from *pB + + tst.l %d5 + blt .hf_negative + bgt .hf_positive + + | fs->error == 0 + mac.l %d0, %a3, (%a1)+, %a3, %acc0 + mac.l %d1, %a3, (%a1)+, %a3, %acc0 + mac.l %d2, %a3, (%a1)+, %a3, %acc0 + mac.l %d3, %a3, (%a1)+, %d4, %acc0 + movem.l (4*4+20, %a0), %d0-%d3 | load 4 values from *pB + bra 0f + + .hf_negative: | fs->error < 0 + movem.l (%a2), %d4-%d7 | load 4 values from *pM + sub.l %d4, %d0 + sub.l %d5, %d1 + sub.l %d6, %d2 + sub.l %d7, %d3 + movem.l %d0-%d3, (20, %a0) + mac.l %d0, %a3, (%a1)+, %a3, %acc0 + mac.l %d1, %a3, (%a1)+, %a3, %acc0 + mac.l %d2, %a3, (%a1)+, %a3, %acc0 + mac.l %d3, %a3, (%a1)+, %d4, %acc0 + + movem.l (4*4+20, %a0), %d0-%d3 | load 4 values from *pB + movem.l (4*4, %a2), %d5-%d7/%a3 | load 4 values from *pM + sub.l %d5, %d0 + sub.l %d6, %d1 + sub.l %d7, %d2 + sub.l %a3, %d3 + movem.l %d0-%d3, (4*4+20, %a0) + bra 0f + + .hf_positive: | fs->error > 0 + movem.l (%a2), %d4-%d7 | load 4 values from *pM + add.l %d4, %d0 + add.l %d5, %d1 + add.l %d6, %d2 + add.l %d7, %d3 + movem.l %d0-%d3, (20, %a0) + mac.l %d0, %a3, (%a1)+, %a3, %acc0 + mac.l %d1, %a3, (%a1)+, %a3, %acc0 + mac.l %d2, %a3, (%a1)+, %a3, %acc0 + mac.l %d3, %a3, (%a1)+, %d4, %acc0 + + movem.l (4*4+20, %a0), %d0-%d3 | load 4 values from *pB + movem.l (4*4, %a2), %d5-%d7/%a3 | load 4 values from *pM + add.l %d5, %d0 + add.l %d6, %d1 + add.l %d7, %d2 + add.l %a3, %d3 + movem.l %d0-%d3, (4*4+20, %a0) + + 0: + + mac.l %d0, %d4, (%a1)+, %d5, %acc0 | common macro block + mac.l %d1, %d5, (%a1)+, %d6, %acc0 + mac.l %d2, %d6, (%a1), %d7, %acc0 + mac.l %d3, %d7, %acc0 + + move.l (8*4+8, %sp), %a3 | a3 = in + move.l (%a3), %d3 + move.l %d3, (4, %a0) | fs->error = *in + movclr.l %acc0, %d0 | d0 = sum + movem.l (8, %a0), %d1-%d2 + add.l %d1, %d0 | sum += fs->round + asr.l %d2, %d0 | sum >>= fs->shift + + add.l %d0, %d3 + move.l %d3, (%a3) | *in += (sum >> fs->shift) + + move.l %d3, ( 1*4, %a1) + sub.l %d7, %d3 + move.l %d3, ( 0*4, %a1) + sub.l %d6, %d3 + move.l %d3, (-1*4, %a1) + sub.l %d5, %d3 + move.l %d3, (-2*4, %a1) + + moveq #30,%d0 + asr.l %d0,%d7 + asr.l %d0,%d6 + asr.l %d0,%d5 + asr.l %d0,%d4 + + moveq #1,%d0 + or.l %d0,%d7 + or.l %d0,%d6 + or.l %d0,%d5 + or.l %d0,%d4 + + lsl.l #2,%d7 + lsl.l #1,%d6 + lsl.l #1,%d5 + movem.l %d4-%d7, (8*4-3*4,%a2) | store to *pM + + move.l (%a0), %d0 + addq.l #1, %d0 + cmp.l #16, %d0 | ++fs->index == 16 ? + bne 1f + + movem.l (16*4+148, %a0), %d0-%d7 + movem.l %d0-%d7, (148, %a0) + movem.l (16*4+52, %a0), %d0-%d7 + movem.l %d0-%d7, (52, %a0) + clr.l %d0 | fs->index = 0 + 1: + + move.l %d0, (%a0) + + movem.l (%sp), %d2-%d7/%a2-%a3 | restore stacked regs + lea.l (8*4, %sp), %sp + rts + diff --git a/apps/codecs/libtta/ttadec.c b/apps/codecs/libtta/ttadec.c index 2ff2d24da9..9d53a327f2 100644 --- a/apps/codecs/libtta/ttadec.c +++ b/apps/codecs/libtta/ttadec.c @@ -392,6 +392,10 @@ int player_init (tta_info *info) { unsigned int data_offset; unsigned int st_size; +#ifdef CPU_COLDFIRE + coldfire_set_macsr(0); /* signed integer mode */ +#endif + ttainfo = info; framelen = 0; -- cgit v1.2.3