From 9985caf3f96df691fad9332986b7af4d0f66676d Mon Sep 17 00:00:00 2001 From: Thom Johansen Date: Tue, 31 May 2005 07:56:28 +0000 Subject: ASM optimisation by David Bryant. Placed various important arrays in IRAM. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6540 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libwavpack/Makefile | 2 +- apps/codecs/libwavpack/SOURCES | 3 + apps/codecs/libwavpack/coldfire.S | 535 ++++++++++++++++++++++++++++++++++++++ apps/codecs/libwavpack/unpack.c | 45 ++-- apps/codecs/libwavpack/wputils.c | 2 +- apps/plugins/wv2wav.c | 2 +- 6 files changed, 566 insertions(+), 23 deletions(-) create mode 100644 apps/codecs/libwavpack/coldfire.S diff --git a/apps/codecs/libwavpack/Makefile b/apps/codecs/libwavpack/Makefile index df26559f59..75b9060534 100644 --- a/apps/codecs/libwavpack/Makefile +++ b/apps/codecs/libwavpack/Makefile @@ -15,7 +15,7 @@ INCLUDES += -I$(APPSDIR)/$(APPEXTRA) endif CFLAGS = $(GCCOPTS) \ -$(INCLUDES) $(TARGET) $(EXTRA_DEFINES) -DMEM=${MEMORYSIZE} +$(INCLUDES) $(TARGET) $(EXTRA_DEFINES) -DMEM=${MEMORYSIZE} -O2 \ # This sets up 'SRC' based on the files mentioned in SOURCES include $(TOOLSDIR)/makesrc.inc diff --git a/apps/codecs/libwavpack/SOURCES b/apps/codecs/libwavpack/SOURCES index def57b703c..a4f0f2f7a9 100644 --- a/apps/codecs/libwavpack/SOURCES +++ b/apps/codecs/libwavpack/SOURCES @@ -4,4 +4,7 @@ metadata.c unpack.c words.c wputils.c +#if CONFIG_CPU==MCF5249 && !defined(SIMULATOR) +coldfire.S +#endif diff --git a/apps/codecs/libwavpack/coldfire.S b/apps/codecs/libwavpack/coldfire.S new file mode 100644 index 0000000000..9c7e098e88 --- /dev/null +++ b/apps/codecs/libwavpack/coldfire.S @@ -0,0 +1,535 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2005 by David Bryant + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +/* This is an assembly optimized version of the following WavPack function: + * + * void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, + * long *buffer, long sample_count); + * + * It performs a single pass of stereo decorrelation on the provided buffer. + * Note that this version of the function requires that the 8 previous stereo + * samples are visible and correct. In other words, it ignores the "samples_*" + * fields in the decorr_pass structure and gets the history data directly + * from the buffer. It does, however, return the appropriate history samples + * to the decorr_pass structure before returning. + * + * This is written to work on a MCF5249 processor, or any processor based on + * the ColdFire V2 core with an EMAC unit. The EMAC is perfectly suited for + * the "apply_weight" function of WavPack decorrelation because it provides + * the requires 40-bit product. The fractional rounding mode of the EMAC is not + * configurable and uses "round to even" while WavPack uses "round to larger", + * so the rounding has to be done manually. + */ + + .text + .align 2 + .global decorr_stereo_pass_cont_mcf5249 + +decorr_stereo_pass_cont_mcf5249: + + lea (-44, %sp), %sp + movem.l %d2-%d7/%a2-%a6, (%sp) + move.l 44+4(%sp), %a2 | a2 = dpp-> + move.l 44+8(%sp), %a1 | a1 = bptr + move.w 2(%a2), %a3 | a3 = dpp->delta + move.w 4(%a2), %d3 | d3 = dpp->weight_A (sign extended) + ext.l %d3 + move.w 6(%a2), %d4 | d4 = dpp->weight_B (sign extended) + ext.l %d4 + move.l 44+12(%sp), %d0 | d0 = sample_count + jbeq return_only | if zero, nothing to do + + lsl.l #3, %d0 | d5 = bptr + (sample_count * 8) + move.l %d0, %d5 + add.l %a1, %d5 + + moveq.l #17, %d0 | left shift weights & delta 17 places + asl.l %d0, %d3 + asl.l %d0, %d4 + move.l %a3, %d1 + asl.l %d0, %d1 + move.l %d1, %a3 + + move.l #0x20, %macsr | set fractional mode for MAC + move.l #0, %acc1 | acc1 = 0x00 0000 80 (for rounding) + move.l #0x800000, %accext01 + + move.l #1024<<17, %d6 | d6 & d7 are weight clipping limits + move.l #-1024<<17, %d7 | (only used by negative terms) + + move.w (%a2), %d0 | d0 = term + ext.l %d0 + cmp.l #17, %d0 + jbeq term_17 | term = 17 + cmp.l #18, %d0 + jbeq term_18 | term = 18 + addq.l #1, %d0 + jbeq term_minus_1 | term = -1 + addq.l #1, %d0 + jbeq term_minus_2 | term = -2 + addq.l #1, %d0 + jbeq term_minus_3 | term = -3 + jbra term_default | default term = 1 - 8 + +|------------------------------------------------------------------------------ +| Loop to handle term = 17 condition +| +| a0 = d0 = (2 * bptr [-1]) - bptr [-2] +| a1 = bptr d1 = initial bptr [0] +| a2 = dpp-> d2 = updated bptr [0] +| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 +| a4 = d4 = dpp->weight_B << 17 +| a5 = d5 = eptr +| macsr = 0x20 acc1 = 0x00 0000 80 +|------------------------------------------------------------------------------ + +term_17: + move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2] + add.l %d0, %d0 + sub.l -16(%a1), %d0 + beq .L251 | if zero, skip calculation + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A + mac.l %d0, %d3, %acc0 + move.l (%a1), %d1 + beq .L255 + eor.l %d1, %d0 | else compare signs + bge .L256 | if same, add delta to weight + sub.l %a3, %d3 | else subtract delta from weight + sub.l %a3, %d3 | subtract again instead of branch +.L256: add.l %a3, %d3 | add delta to weight + +.L255: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | update bptr [0] and store + move.l %d2, (%a1)+ + +.L253: move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2] + add.l %d0, %d0 + sub.l -16(%a1), %d0 + beq .L257 | if zero, skip calculations + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B + mac.l %d0, %d4, %acc0 + move.l (%a1), %d1 + beq .L254 + eor.l %d1, %d0 | else compare signs + bge .L259 | if same, add delta to weight + sub.l %a3, %d4 | else subtract delta from weight + sub.l %a3, %d4 | subtract again instead of branch +.L259: add.l %a3, %d4 | add delta to weight + +.L254: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | update bptr [0] and store + move.l %d2, (%a1)+ + +.L252: cmp.l %a1, %d5 | loop if bptr < eptr + jbhi term_17 + bra term_17_18_finish | exit through common path + +.L251: addq.l #4, %a1 | update point and jump back into loop + bra .L253 + +.L257: addq.l #4, %a1 | update point and jump back into loop + bra .L252 + +|------------------------------------------------------------------------------ +| Loop to handle term = 18 condition +| +| a0 = d0 = ((3 * bptr [-1]) - bptr [-2]) >> 1 +| a1 = bptr d1 = initial bptr [0] +| a2 = dpp-> d2 = updated bptr [0] +| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 +| a4 = d4 = dpp->weight_B << 17 +| a5 = d5 = eptr +| macsr = 0x20 acc1 = 0x00 0000 80 +|------------------------------------------------------------------------------ + +term_18: + move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1 + lea (%a0,%a0.l*2), %a0 + move.l %a0, %d0 + sub.l -16(%a1), %d0 + asr.l #1, %d0 + beq .L260 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A + mac.l %d0, %d3, %acc0 + move.l (%a1), %d1 + beq .L266 + eor.l %d1, %d0 | else compare signs + bge .L267 | if same, add delta to weight + sub.l %a3, %d3 | else subtract delta from weight + sub.l %a3, %d3 | subtract again instead of branch +.L267: add.l %a3, %d3 | add delta to weight + +.L266: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [0], store + move.l %d2, (%a1)+ + +.L268: move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1 + lea (%a0,%a0.l*2), %a0 + move.l %a0, %d0 + sub.l -16(%a1), %d0 + asr.l #1, %d0 + beq .L261 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B + mac.l %d0, %d4, %acc0 + move.l (%a1), %d1 + beq .L265 + eor.l %d1, %d0 | else compare signs + bge .L270 | if same, add delta to weight + sub.l %a3, %d4 | else subtract delta from weight + sub.l %a3, %d4 | subtract again instead of branch +.L270: add.l %a3, %d4 | add delta to weight + +.L265: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [0], store + move.l %d2, (%a1)+ + +.L269: cmp.l %a1, %d5 | loop if bptr < eptr + jbhi term_18 + bra term_17_18_finish | exit through common path + +.L260: addq.l #4, %a1 | bump pointer and jump back into loop + bra .L268 + +.L261: addq.l #4, %a1 | bump pointer and jump back into loop + bra .L269 + +term_17_18_finish: + move.l -4(%a1), 40(%a2) | restore dpp->samples_A [0-1], B [0-1] + move.l -8(%a1), 8(%a2) + move.l -12(%a1), 44(%a2) + move.l -16(%a1), 12(%a2) + jbra finish_up + +|------------------------------------------------------------------------------ +| Loop to handle default terms (i.e. 1 - 8) +| +| a0 = tptr d0 = tptr [0] +| a1 = bptr d1 = initial bptr [0] +| a2 = dpp-> d2 = updated bptr [0] +| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 +| a4 = d4 = dpp->weight_B << 17 +| a5 = d5 = eptr +| macsr = 0x20 acc1 = 0x00 0000 80 +|------------------------------------------------------------------------------ + +term_default: + move.w (%a2), %d0 | a0 = a1 - (dpp->term * 8) + ext.l %d0 + lsl.l #3, %d0 + move.l %a1, %a0 + sub.l %d0, %a0 + +term_default_loop: + move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero + beq .L271 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A + mac.l %d0, %d3, %acc0 + move.l (%a1), %d1 + beq .L277 + eor.l %d1, %d0 | else compare signs + bge .L278 | if same, add delta to weight + sub.l %a3, %d3 | else subtract delta from weight + sub.l %a3, %d3 | subtract again instead of branch +.L278: add.l %a3, %d3 | add delta to weight + +.L277: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [0], store + move.l %d2, (%a1)+ + +.L275: move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero + beq .L272 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B + mac.l %d0, %d4, %acc0 + move.l (%a1), %d1 + beq .L276 + eor.l %d1, %d0 | else compare signs + bge .L281 | if same, add delta to weight + sub.l %a3, %d4 | else subtract delta from weight + sub.l %a3, %d4 | subtract again instead of branch +.L281: add.l %a3, %d4 | add delta to weight + +.L276: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [0], store + move.l %d2, (%a1)+ + +.L274: cmp.l %a1, %d5 | loop back if bptr < eptr + jbhi term_default_loop + move.w (%a2), %d0 | d0 = term - 1 + moveq.l #8, %d1 | d1 = loop counter + +.L323: subq.l #1, %d0 | back up & mask index + and.l #7, %d0 + move.l -(%a1), 40(%a2,%d0.l*4) | store dpp->samples_B [d0] + move.l -(%a1), 8(%a2,%d0.l*4) | store dpp->samples_A [d0] + subq.l #1, %d1 | loop on count + jbne .L323 + jbra finish_up + +.L271: addq.l #4, %a1 | bump pointer and jump back into loop + bra .L275 + +.L272: addq.l #4, %a1 | bump pointer and jump back into loop + bra .L274 + + +|------------------------------------------------------------------------------ +| Loop to handle term = -1 condition +| +| a0 = d0 = decorrelation sample +| a1 = bptr d1 = initial bptr [0] +| a2 = dpp-> d2 = updated bptr [0] +| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 +| a4 = d4 = dpp->weight_B << 17 +| a5 = d5 = eptr +| a6 = d6 = 1024 << 17 +| a7 = d7 = -1024 << 17 +| macsr = 0x20 acc1 = 0x00 0000 80 +|------------------------------------------------------------------------------ + +term_minus_1: + move.l -4(%a1), %d0 | d0 = bptr [-1] + beq .L402 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) + mac.l %d0, %d3, %acc0 + move.l (%a1), %d1 + beq .L405 + eor.l %d1, %d0 | else compare signs + bge .L404 | if same, add delta to weight + sub.l %a3, %d3 | else subtract delta from weight + cmp.l %d7, %d3 | check for negative clip limit + bge .L405 + move.l %d7, %d3 + bra .L405 + +.L404: add.l %a3, %d3 | add delta to weight + cmp.l %d6, %d3 | check for positive clip limit + ble .L405 + move.l %d6, %d3 + +.L405: move.l %acc0, %d0 | d2 = rounded product + add.l %d1, %d0 | add applied weight to bptr [0], store + move.l %d0, (%a1)+ + beq .L401 + +.L410: move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) + mac.l %d0, %d4, %acc0 + move.l (%a1), %d1 + beq .L403 + eor.l %d1, %d0 | else compare signs + bge .L407 | if same, add delta to weight + sub.l %a3, %d4 | else subtract delta from weight + cmp.l %d7, %d4 | check for negative clip limit + bge .L403 + move.l %d7, %d4 + bra .L403 + +.L407: add.l %a3, %d4 | add delta to weight + cmp.l %d6, %d4 | check for positive clip limit + ble .L403 + move.l %d6, %d4 + +.L403: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [1], store + move.l %d2, (%a1)+ + +.L411: cmp.l %a1, %d5 | loop back if bptr < eptr + jbhi term_minus_1 + move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1] + jbra finish_up + +.L402: move.l (%a1)+, %d0 + bne .L410 + +.L401: addq.l #4, %a1 + bra .L411 + + +|------------------------------------------------------------------------------ +| Loop to handle term = -2 condition +| +| a0 = d0 = decorrelation sample +| a1 = bptr d1 = initial bptr [0] +| a2 = dpp-> d2 = updated bptr [0] +| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 +| a4 = d4 = dpp->weight_B << 17 +| a5 = d5 = eptr +| a6 = d6 = 1024 << 17 +| a7 = d7 = -1024 << 17 +| macsr = 0x20 acc1 = 0x00 0000 80 +|------------------------------------------------------------------------------ + +term_minus_2: + move.l -8(%a1), %d0 | d0 = bptr [-2] + beq .L511 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) + mac.l %d0, %d4, %acc0 + move.l 4(%a1), %d1 + beq .L505 + eor.l %d1, %d0 | else compare signs + bge .L504 | if same, add delta to weight + sub.l %a3, %d4 | else subtract delta from weight + cmp.l %d7, %d4 | ckeck for negative clip limit + bge .L505 + move.l %d7, %d4 + bra .L505 + +.L504: add.l %a3, %d4 | add delta to weight + cmp.l %d6, %d4 | check for positive clip limit + ble .L505 + move.l %d6, %d4 + +.L505: move.l %acc0, %d0 | d2 = rounded product + add.l %d1, %d0 | add applied weight to bptr [0], store + move.l %d0, 4(%a1) + beq .L512 + +.L510: move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) + mac.l %d0, %d3, %acc0 + move.l (%a1), %d1 + beq .L503 + eor.l %d1, %d0 | else compare signs + bge .L507 | if same, add delta to weight + sub.l %a3, %d3 | else subtract delta from weight + cmp.l %d7, %d3 | check for negative clip limit + bge .L503 + move.l %d7, %d3 + bra .L503 + +.L507: add.l %a3, %d3 | add delta to weight + cmp.l %d6, %d3 | check for negative clip limit + ble .L503 + move.l %d6, %d3 + +.L503: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [1], store + move.l %d2, (%a1) + +.L512: addq.l #8, %a1 + cmp.l %a1, %d5 | loop if bptr < eptr + jbhi term_minus_2 + move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-4] + jbra finish_up + +.L511: move.l 4(%a1), %d0 + beq .L512 + bra .L510 + + +|------------------------------------------------------------------------------ +| Loop to handle term = -3 condition +| +| a0 = d0 = decorrelation sample +| a1 = bptr d1 = initial bptr [0] +| a2 = dpp-> d2 = updated bptr [0] +| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 +| a4 = d4 = dpp->weight_B << 17 +| a5 = d5 = eptr +| a6 = d6 = 1024 << 17 +| a7 = d7 = -1024 << 17 +| macsr = 0x20 acc1 = 0x00 0000 80 +|------------------------------------------------------------------------------ + +term_minus_3: + move.l -4(%a1), %d0 | d0 = bptr [-1] + beq .L301 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) + mac.l %d0, %d3, %acc0 + move.l (%a1), %d1 + beq .L320 + eor.l %d1, %d0 | else compare signs + bge .L319 | if same, add delta to weight + sub.l %a3, %d3 | else subtract delta from weight + cmp.l %d7, %d3 | check for negative clip limit + bge .L320 + move.l %d7, %d3 + bra .L320 + +.L319: add.l %a3, %d3 | add delta to weight + cmp.l %d6, %d3 | check for positive clip limit + ble .L320 + move.l %d6, %d3 + +.L320: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [0], store + move.l %d2, (%a1)+ + +.L330: move.l -12(%a1), %d0 | d0 = bptr [-2] + beq .L302 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) + mac.l %d0, %d4, %acc0 + move.l (%a1), %d1 + beq .L318 + eor.l %d1, %d0 | else compare signs + bge .L322 | if same, add delta to weight + sub.l %a3, %d4 | else subtract delta from weight + cmp.l %d7, %d4 | check for negative clip limit + bge .L318 + move.l %d7, %d4 + bra .L318 + +.L322: add.l %a3, %d4 | add delta to weight + cmp.l %d6, %d4 | check for positive clip limit + ble .L318 + move.l %d6, %d4 + +.L318: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [1], store + move.l %d2, (%a1)+ + +.L331: cmp.l %a1, %d5 | bptr, eptr + jbhi term_minus_3 + move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1] + move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-2] + jbra finish_up + +.L301: addq.l #4, %a1 + bra .L330 + +.L302: addq.l #4, %a1 + bra .L331 + +| finish and return + +finish_up: + moveq.l #17, %d0 + asr.l %d0, %d3 + asr.l %d0, %d4 + move.w %d3, 4(%a2) | weight_A, dpp->weight_A + move.w %d4, 6(%a2) | weight_B, dpp->weight_B + + clr.l %d0 | clear up EMAC + move.l %d0, %acc0 + move.l %d0, %acc1 + +return_only: + movem.l (%sp), %d2-%d7/%a2-%a6 + lea (44,%sp), %sp + rts diff --git a/apps/codecs/libwavpack/unpack.c b/apps/codecs/libwavpack/unpack.c index ae473787a7..5afaac3659 100644 --- a/apps/codecs/libwavpack/unpack.c +++ b/apps/codecs/libwavpack/unpack.c @@ -27,7 +27,11 @@ static void strcpy_loc (char *dst, char *src) { while (*src) *dst++ = *src++; *d // these macros implement the weight application and update operations // that are at the heart of the decorrelation loops +#if 0 // PERFCOND #define apply_weight_i(weight, sample) ((weight * sample + 512) >> 10) +#else +#define apply_weight_i(weight, sample) ((((weight * sample) >> 8) + 2) >> 2) +#endif #define apply_weight_f(weight, sample) (((((sample & 0xffff) * weight) >> 9) + \ (((sample & ~0xffff) >> 9) * weight) + 1) >> 1) @@ -39,7 +43,7 @@ static void strcpy_loc (char *dst, char *src) { while (*src) *dst++ = *src++; *d #define apply_weight(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10)) #endif -#if 1 // PERFCOND +#if 0 // PERFCOND #define update_weight(weight, delta, source, result) \ if (source && result) weight -= ((((source ^ result) >> 30) & 2) - 1) * delta; #else @@ -315,9 +319,14 @@ int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd) // samples unpacked, which can be less than the number requested if an error // occurs or the end of the block is reached. +#if CONFIG_CPU==MCF5249 && !defined(SIMULATOR) +extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, long *buffer, long sample_count); +#else +static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count); +#endif + static void decorr_mono_pass (struct decorr_pass *dpp, long *buffer, long sample_count); static void decorr_stereo_pass (struct decorr_pass *dpp, long *buffer, long sample_count); -static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count); static void fixup_samples (WavpackStream *wps, long *buffer, ulong sample_count); long unpack_samples (WavpackContext *wpc, long *buffer, ulong sample_count) @@ -372,7 +381,11 @@ long unpack_samples (WavpackContext *wpc, long *buffer, ulong sample_count) else for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { decorr_stereo_pass (dpp, buffer, 8); +#if CONFIG_CPU==MCF5249 && !defined(SIMULATOR) + decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8); +#else decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8); +#endif } if (flags & JOINT_STEREO) @@ -530,11 +543,13 @@ static void decorr_stereo_pass (struct decorr_pass *dpp, long *buffer, long samp dpp->weight_B = weight_B; } +#if CONFIG_CPU != MCF5249 || defined(SIMULATOR) + static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count) { long delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B; long *bptr, *tptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B; - int k; + int k, i; switch (dpp->term) { @@ -581,23 +596,11 @@ static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long update_weight (weight_B, delta, tptr [1], sam_A); } - k = dpp->term; - dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-1]; - dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-2]; - dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-3]; - dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-4]; - dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-5]; - dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-6]; - dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-7]; - dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-8]; - dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-9]; - dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-10]; - dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-11]; - dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-12]; - dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-13]; - dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-14]; - dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-15]; - dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-16]; + for (k = dpp->term - 1, i = 8; i--; k--) { + dpp->samples_B [k & (MAX_TERM - 1)] = *--bptr; + dpp->samples_A [k & (MAX_TERM - 1)] = *--bptr; + } + break; case -1: @@ -639,6 +642,8 @@ static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long dpp->weight_B = weight_B; } +#endif + static void decorr_mono_pass (struct decorr_pass *dpp, long *buffer, long sample_count) { long delta = dpp->delta, weight_A = dpp->weight_A; diff --git a/apps/codecs/libwavpack/wputils.c b/apps/codecs/libwavpack/wputils.c index 9227b66e46..8d58b3b4d7 100644 --- a/apps/codecs/libwavpack/wputils.c +++ b/apps/codecs/libwavpack/wputils.c @@ -45,7 +45,7 @@ static ulong read_next_header (read_stream infile, WavpackHeader *wphdr); // large integer or floating point files (but always provides at least 24 bits // of resolution). -static WavpackContext wpc; +static WavpackContext wpc IDATA_ATTR; WavpackContext *WavpackOpenFileInput (read_stream infile, char *error) { diff --git a/apps/plugins/wv2wav.c b/apps/plugins/wv2wav.c index c0bc05cf12..909a0c3c63 100644 --- a/apps/plugins/wv2wav.c +++ b/apps/plugins/wv2wav.c @@ -29,7 +29,7 @@ static struct plugin_api* rb; static file_info_struct file_info; -static long temp_buffer [BUFFER_SIZE]; +static long temp_buffer [BUFFER_SIZE] IDATA_ATTR; /* Reformat samples from longs in processor's native endian mode to little-endian data with 2 bytes / sample. */ -- cgit v1.2.3