From e9edc8f82df2c182c2453720a79ad37c55e6ef4b Mon Sep 17 00:00:00 2001 From: Dave Chapman Date: Sat, 19 Feb 2005 22:11:29 +0000 Subject: Thom Johansen's first EMAC optimisation for the Coldfire - about a 3%-4% speedup git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6024 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libFLAC/SOURCES | 3 + apps/codecs/libFLAC/coldfire.c | 165 +++++++++++++++++++++++++ apps/codecs/libFLAC/include/private/coldfire.h | 46 +++++++ apps/codecs/libFLAC/stream_decoder.c | 8 ++ 4 files changed, 222 insertions(+) create mode 100644 apps/codecs/libFLAC/coldfire.c create mode 100644 apps/codecs/libFLAC/include/private/coldfire.h (limited to 'apps') diff --git a/apps/codecs/libFLAC/SOURCES b/apps/codecs/libFLAC/SOURCES index fc793f5e10..7f5abc26fb 100644 --- a/apps/codecs/libFLAC/SOURCES +++ b/apps/codecs/libFLAC/SOURCES @@ -10,3 +10,6 @@ md5.c memory.c seekable_stream_decoder.c stream_decoder.c +#if CONFIG_CPU==MCF5249 +coldfire.c +#endif diff --git a/apps/codecs/libFLAC/coldfire.c b/apps/codecs/libFLAC/coldfire.c new file mode 100644 index 0000000000..c763c2001f --- /dev/null +++ b/apps/codecs/libFLAC/coldfire.c @@ -0,0 +1,165 @@ +#ifndef SIMULATOR +#include + +void FLAC__lpc_restore_signal_order8_mac(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]) +{ + register const FLAC__int32 *qlp0 = &qlp_coeff[(order-1)]; + register FLAC__int32 sum; + register const FLAC__int32 *history; + + SET_MACSR(0); + history = &data[(-order)]; + SET_ACC(0, acc0); + + switch (order) { + case 8: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t" + "mov.l -16(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t" + "mov.l -20(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 24(%2), %%d1, %%acc0\n\t" + "mov.l -24(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 28(%2), %%d1, %%acc0\n\t" + "mov.l -28(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 7: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t" + "mov.l -16(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t" + "mov.l -20(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 24(%2), %%d1, %%acc0\n\t" + "mov.l -24(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 6: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t" + "mov.l -16(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t" + "mov.l -20(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 5: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t" + "mov.l -16(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 4: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t" + "mov.l -12(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 3: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t" + "mov.l -8(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 2: + for( ; data_len != 0; --data_len) { + asm volatile( + "mov.l (%1), %%d0\n\t" + "mov.l (%2), %%d1\n\t" + "mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t" + "mov.l -4(%1), %%d0\n\t" + "mac.l %%d0, %%d1, %%acc0\n\t" + "movclr.l %%acc0, %0" + : "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1"); + ++history; + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + case 1: + // won't gain anything by using mac here. + for( ; data_len != 0; --data_len) { + sum = (qlp0[0] * (*(history++))); + *(data++) = *(residual++) + (sum >> lp_quantization); + } + return; + } +} + +#endif diff --git a/apps/codecs/libFLAC/include/private/coldfire.h b/apps/codecs/libFLAC/include/private/coldfire.h new file mode 100644 index 0000000000..22f1711f2c --- /dev/null +++ b/apps/codecs/libFLAC/include/private/coldfire.h @@ -0,0 +1,46 @@ +#ifndef SIMULATOR +#ifndef _FLAC_COLDFIRE_H +#define _FLAC_COLDFIRE_H + +#include + +#define MACL(x, y, acc) \ + asm volatile ("mac.l %0, %1, %%" #acc \ + : : "ad" ((x)), "ad" ((y))); + +#define MACL_SHIFT(x, y, shift, acc) \ + asm volatile ("mac.l %0, %1, #" #shift ", %%" #acc \ + : : "ad" ((x)), "ad" ((y))); + +#define MSACL(x, y, acc) \ + asm volatile ("msac.l %0, %1, %%" #acc \ + : : "ad" ((x)), "ad" ((y))); + +#define MSACL_SHIFT(x, y, shift, acc) \ + asm volatile ("msac.l %0, %1, #" #shift ", %%" #acc \ + : : "ad" ((x)), "ad" ((y))); + +#define SET_MACSR(x) \ + asm volatile ("mov.l %0, %%macsr" : : "adi" ((x))); + +#define TRANSFER_ACC(acca, accb) \ + asm volatile ("mov.l %" #acca ", %" #accb); + +#define SET_ACC(x, acc) \ + asm volatile ("mov.l %0, %%" #acc : : "adi" ((x))); + +#define GET_ACC(x, acc) \ + asm volatile ("mov.l %%" #acc ", %0\n\t" : "=ad" ((x))); + +#define GET_ACC_CLR(x, acc) \ + asm volatile ("movclr.l %%" #acc ", %0\n\t" : "=ad" ((x))); + +#define EMAC_SATURATE 0x00000080 +#define EMAC_FRACTIONAL 0x00000020 +#define EMAC_ROUND 0x00000010 + + +void FLAC__lpc_restore_signal_order8_mac(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); + +#endif +#endif diff --git a/apps/codecs/libFLAC/stream_decoder.c b/apps/codecs/libFLAC/stream_decoder.c index ec43314fe9..eb78d18be0 100644 --- a/apps/codecs/libFLAC/stream_decoder.c +++ b/apps/codecs/libFLAC/stream_decoder.c @@ -43,6 +43,10 @@ #include "private/lpc.h" #include "private/memory.h" +#if CONFIG_CPU==MCF5249 +#include +#endif + #ifdef HAVE_CONFIG_H #include #endif @@ -298,7 +302,11 @@ FLAC_API FLAC__StreamDecoderState FLAC__stream_decoder_init(FLAC__StreamDecoder decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal; decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide; decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal; +#if CONFIG_CPU==MCF5249 && !SIMULATOR + decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_order8_mac; +#else decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal; +#endif /* now override with asm where appropriate */ #ifndef FLAC__NO_ASM if(decoder->private_->cpuinfo.use_asm) { -- cgit v1.2.3