diff options
author | Nils Wallménius <nils@rockbox.org> | 2010-07-12 16:14:32 +0000 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2010-07-12 16:14:32 +0000 |
commit | a4cad3d92684187d37c4034cbe185184719baaca (patch) | |
tree | 9802132da7732d79e9cab47a11f45ce972156f0c /apps/codecs | |
parent | d3a194593958c45b2173e7d3c919af4548c9f55f (diff) | |
download | rockbox-a4cad3d92684187d37c4034cbe185184719baaca.tar.gz rockbox-a4cad3d92684187d37c4034cbe185184719baaca.zip |
Coldfire assembler implementation of hybrid_filter for libtta. Speeds up decoding on h300 by 4.2MHz. Set svn properties.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27404 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r-- | apps/codecs/libtta/SOURCES | 3 | ||||
-rw-r--r-- | apps/codecs/libtta/filter.h | 2 | ||||
-rw-r--r-- | apps/codecs/libtta/filter_coldfire.S | 164 | ||||
-rw-r--r-- | apps/codecs/libtta/ttadec.c | 4 |
4 files changed, 172 insertions, 1 deletions
diff --git a/apps/codecs/libtta/SOURCES b/apps/codecs/libtta/SOURCES index 35f2660dd7..0a8f1171eb 100644 --- a/apps/codecs/libtta/SOURCES +++ b/apps/codecs/libtta/SOURCES | |||
@@ -2,3 +2,6 @@ ttadec.c | |||
2 | #ifdef CPU_ARM | 2 | #ifdef CPU_ARM |
3 | filter_arm.S | 3 | filter_arm.S |
4 | #endif | 4 | #endif |
5 | #ifdef CPU_COLDFIRE | ||
6 | filter_coldfire.S | ||
7 | #endif | ||
diff --git a/apps/codecs/libtta/filter.h b/apps/codecs/libtta/filter.h index 6eef6dcf42..228757b9a0 100644 --- a/apps/codecs/libtta/filter.h +++ b/apps/codecs/libtta/filter.h | |||
@@ -42,7 +42,7 @@ | |||
42 | ///////// Filter Settings ////////// | 42 | ///////// Filter Settings ////////// |
43 | static int flt_set[3] = {10, 9, 10}; | 43 | static int flt_set[3] = {10, 9, 10}; |
44 | 44 | ||
45 | #ifdef CPU_ARM | 45 | #if defined(CPU_ARM) || defined(CPU_COLDFIRE) |
46 | int hybrid_filter(fltst *fs, int *in); /* implements in filter_arm.S */ | 46 | int hybrid_filter(fltst *fs, int *in); /* implements in filter_arm.S */ |
47 | 47 | ||
48 | #else | 48 | #else |
diff --git a/apps/codecs/libtta/filter_coldfire.S b/apps/codecs/libtta/filter_coldfire.S new file mode 100644 index 0000000000..3950eb52e6 --- /dev/null +++ b/apps/codecs/libtta/filter_coldfire.S | |||
@@ -0,0 +1,164 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2010 Nils Wallménius | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version 2 | ||
15 | * of the License, or (at your option) any later version. | ||
16 | * | ||
17 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
18 | * KIND, either express or implied. | ||
19 | * | ||
20 | ****************************************************************************/ | ||
21 | |||
22 | #include "config.h" | ||
23 | |||
24 | /* | ||
25 | * The following is an assembler optimised version of | ||
26 | * void hybrid_filter(fltst *fs, int *in) | ||
27 | */ | ||
28 | |||
29 | #if defined(USE_IRAM) | ||
30 | .section .icode | ||
31 | #else | ||
32 | .text | ||
33 | #endif | ||
34 | .align 2 | ||
35 | .global hybrid_filter | ||
36 | .type hybrid_filter, @function | ||
37 | |||
38 | hybrid_filter: | ||
39 | lea.l (-8*4, %sp), %sp | ||
40 | movem.l %d2-%d7/%a2-%a3, (%sp) | save some registers | ||
41 | move.l (8*4+4, %sp), %a0 | a0 = fs | ||
42 | movem.l (%a0), %d4-%d5 | d4 = fs->index, d5 = fs->error | ||
43 | |||
44 | lea.l (%a0, %d4.l*4), %a2 | ||
45 | lea.l (148, %a2), %a1 | a1 = fs->dl + fs->index (*pA) | ||
46 | lea.l (52, %a2), %a2 | a2 = fs->dx + fs->index (*pM) | ||
47 | |||
48 | move.l (%a1)+, %a3 | load one value from *pA (needed in every case) | ||
49 | movem.l (20, %a0), %d0-%d3 | load 4 values from *pB | ||
50 | |||
51 | tst.l %d5 | ||
52 | blt .hf_negative | ||
53 | bgt .hf_positive | ||
54 | |||
55 | | fs->error == 0 | ||
56 | mac.l %d0, %a3, (%a1)+, %a3, %acc0 | ||
57 | mac.l %d1, %a3, (%a1)+, %a3, %acc0 | ||
58 | mac.l %d2, %a3, (%a1)+, %a3, %acc0 | ||
59 | mac.l %d3, %a3, (%a1)+, %d4, %acc0 | ||
60 | movem.l (4*4+20, %a0), %d0-%d3 | load 4 values from *pB | ||
61 | bra 0f | ||
62 | |||
63 | .hf_negative: | fs->error < 0 | ||
64 | movem.l (%a2), %d4-%d7 | load 4 values from *pM | ||
65 | sub.l %d4, %d0 | ||
66 | sub.l %d5, %d1 | ||
67 | sub.l %d6, %d2 | ||
68 | sub.l %d7, %d3 | ||
69 | movem.l %d0-%d3, (20, %a0) | ||
70 | mac.l %d0, %a3, (%a1)+, %a3, %acc0 | ||
71 | mac.l %d1, %a3, (%a1)+, %a3, %acc0 | ||
72 | mac.l %d2, %a3, (%a1)+, %a3, %acc0 | ||
73 | mac.l %d3, %a3, (%a1)+, %d4, %acc0 | ||
74 | |||
75 | movem.l (4*4+20, %a0), %d0-%d3 | load 4 values from *pB | ||
76 | movem.l (4*4, %a2), %d5-%d7/%a3 | load 4 values from *pM | ||
77 | sub.l %d5, %d0 | ||
78 | sub.l %d6, %d1 | ||
79 | sub.l %d7, %d2 | ||
80 | sub.l %a3, %d3 | ||
81 | movem.l %d0-%d3, (4*4+20, %a0) | ||
82 | bra 0f | ||
83 | |||
84 | .hf_positive: | fs->error > 0 | ||
85 | movem.l (%a2), %d4-%d7 | load 4 values from *pM | ||
86 | add.l %d4, %d0 | ||
87 | add.l %d5, %d1 | ||
88 | add.l %d6, %d2 | ||
89 | add.l %d7, %d3 | ||
90 | movem.l %d0-%d3, (20, %a0) | ||
91 | mac.l %d0, %a3, (%a1)+, %a3, %acc0 | ||
92 | mac.l %d1, %a3, (%a1)+, %a3, %acc0 | ||
93 | mac.l %d2, %a3, (%a1)+, %a3, %acc0 | ||
94 | mac.l %d3, %a3, (%a1)+, %d4, %acc0 | ||
95 | |||
96 | movem.l (4*4+20, %a0), %d0-%d3 | load 4 values from *pB | ||
97 | movem.l (4*4, %a2), %d5-%d7/%a3 | load 4 values from *pM | ||
98 | add.l %d5, %d0 | ||
99 | add.l %d6, %d1 | ||
100 | add.l %d7, %d2 | ||
101 | add.l %a3, %d3 | ||
102 | movem.l %d0-%d3, (4*4+20, %a0) | ||
103 | |||
104 | 0: | ||
105 | |||
106 | mac.l %d0, %d4, (%a1)+, %d5, %acc0 | common macro block | ||
107 | mac.l %d1, %d5, (%a1)+, %d6, %acc0 | ||
108 | mac.l %d2, %d6, (%a1), %d7, %acc0 | ||
109 | mac.l %d3, %d7, %acc0 | ||
110 | |||
111 | move.l (8*4+8, %sp), %a3 | a3 = in | ||
112 | move.l (%a3), %d3 | ||
113 | move.l %d3, (4, %a0) | fs->error = *in | ||
114 | movclr.l %acc0, %d0 | d0 = sum | ||
115 | movem.l (8, %a0), %d1-%d2 | ||
116 | add.l %d1, %d0 | sum += fs->round | ||
117 | asr.l %d2, %d0 | sum >>= fs->shift | ||
118 | |||
119 | add.l %d0, %d3 | ||
120 | move.l %d3, (%a3) | *in += (sum >> fs->shift) | ||
121 | |||
122 | move.l %d3, ( 1*4, %a1) | ||
123 | sub.l %d7, %d3 | ||
124 | move.l %d3, ( 0*4, %a1) | ||
125 | sub.l %d6, %d3 | ||
126 | move.l %d3, (-1*4, %a1) | ||
127 | sub.l %d5, %d3 | ||
128 | move.l %d3, (-2*4, %a1) | ||
129 | |||
130 | moveq #30,%d0 | ||
131 | asr.l %d0,%d7 | ||
132 | asr.l %d0,%d6 | ||
133 | asr.l %d0,%d5 | ||
134 | asr.l %d0,%d4 | ||
135 | |||
136 | moveq #1,%d0 | ||
137 | or.l %d0,%d7 | ||
138 | or.l %d0,%d6 | ||
139 | or.l %d0,%d5 | ||
140 | or.l %d0,%d4 | ||
141 | |||
142 | lsl.l #2,%d7 | ||
143 | lsl.l #1,%d6 | ||
144 | lsl.l #1,%d5 | ||
145 | movem.l %d4-%d7, (8*4-3*4,%a2) | store to *pM | ||
146 | |||
147 | move.l (%a0), %d0 | ||
148 | addq.l #1, %d0 | ||
149 | cmp.l #16, %d0 | ++fs->index == 16 ? | ||
150 | bne 1f | ||
151 | |||
152 | movem.l (16*4+148, %a0), %d0-%d7 | ||
153 | movem.l %d0-%d7, (148, %a0) | ||
154 | movem.l (16*4+52, %a0), %d0-%d7 | ||
155 | movem.l %d0-%d7, (52, %a0) | ||
156 | clr.l %d0 | fs->index = 0 | ||
157 | 1: | ||
158 | |||
159 | move.l %d0, (%a0) | ||
160 | |||
161 | movem.l (%sp), %d2-%d7/%a2-%a3 | restore stacked regs | ||
162 | lea.l (8*4, %sp), %sp | ||
163 | rts | ||
164 | |||
diff --git a/apps/codecs/libtta/ttadec.c b/apps/codecs/libtta/ttadec.c index 2ff2d24da9..9d53a327f2 100644 --- a/apps/codecs/libtta/ttadec.c +++ b/apps/codecs/libtta/ttadec.c | |||
@@ -392,6 +392,10 @@ int player_init (tta_info *info) { | |||
392 | unsigned int data_offset; | 392 | unsigned int data_offset; |
393 | unsigned int st_size; | 393 | unsigned int st_size; |
394 | 394 | ||
395 | #ifdef CPU_COLDFIRE | ||
396 | coldfire_set_macsr(0); /* signed integer mode */ | ||
397 | #endif | ||
398 | |||
395 | ttainfo = info; | 399 | ttainfo = info; |
396 | 400 | ||
397 | framelen = 0; | 401 | framelen = 0; |