diff options
author | Jens Arnold <amiconn@rockbox.org> | 2008-10-07 20:52:42 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2008-10-07 20:52:42 +0000 |
commit | dd7cacdc884a1dcd79256760f1b5677e9a142490 (patch) | |
tree | 6150583630f57b5edcd9c42eede9042979c33bce /apps/codecs/demac/libdemac/vector_math16_armv6.h | |
parent | d07ac657a428754b177a894959fba03474b181a8 (diff) | |
download | rockbox-dd7cacdc884a1dcd79256760f1b5677e9a142490.tar.gz rockbox-dd7cacdc884a1dcd79256760f1b5677e9a142490.zip |
Another minor improvement: better pipelining and one less register used in vector addition/ subtraction.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@18739 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_armv6.h')
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_armv6.h | 30 |
1 files changed, 16 insertions, 14 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h index e180429193..49fa2ceb7d 100644 --- a/apps/codecs/demac/libdemac/vector_math16_armv6.h +++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h | |||
@@ -39,37 +39,38 @@ static inline void vector_add(int16_t* v1, int16_t* v2) | |||
39 | 39 | ||
40 | "10: \n" | 40 | "10: \n" |
41 | "ldrh r4, [%[v2]], #2 \n" | 41 | "ldrh r4, [%[v2]], #2 \n" |
42 | "ldr r5, [%[v2]], #4 \n" | ||
42 | "mov r4, r4, lsl #16 \n" | 43 | "mov r4, r4, lsl #16 \n" |
43 | "1: \n" | 44 | "1: \n" |
44 | "ldmia %[v2]!, {r5-r8} \n" | 45 | "ldmia %[v2]!, {r6-r7} \n" |
45 | "ldmia %[v1], {r0-r3} \n" | 46 | "ldmia %[v1], {r0-r3} \n" |
46 | "mov r5, r5, ror #16 \n" | 47 | "mov r5, r5, ror #16 \n" |
47 | "pkhtb r4, r5, r4, asr #16 \n" | 48 | "pkhtb r4, r5, r4, asr #16 \n" |
48 | "sadd16 r0, r0, r4 \n" | 49 | "sadd16 r0, r0, r4 \n" |
49 | "pkhbt r5, r5, r6, lsl #16 \n" | 50 | "pkhbt r5, r5, r6, lsl #16 \n" |
50 | "sadd16 r1, r1, r5 \n" | 51 | "sadd16 r1, r1, r5 \n" |
52 | "ldmia %[v2]!, {r4-r5} \n" | ||
51 | "mov r7, r7, ror #16 \n" | 53 | "mov r7, r7, ror #16 \n" |
52 | "pkhtb r6, r7, r6, asr #16 \n" | 54 | "pkhtb r6, r7, r6, asr #16 \n" |
53 | "sadd16 r2, r2, r6 \n" | 55 | "sadd16 r2, r2, r6 \n" |
54 | "pkhbt r7, r7, r8, lsl #16 \n" | 56 | "pkhbt r7, r7, r4, lsl #16 \n" |
55 | "sadd16 r3, r3, r7 \n" | 57 | "sadd16 r3, r3, r7 \n" |
56 | "stmia %[v1]!, {r0-r3} \n" | 58 | "stmia %[v1]!, {r0-r3} \n" |
57 | "mov r4, r8 \n" | 59 | "ldmia %[v2]!, {r6-r7} \n" |
58 | "ldmia %[v2]!, {r5-r8} \n" | ||
59 | "ldmia %[v1], {r0-r3} \n" | 60 | "ldmia %[v1], {r0-r3} \n" |
60 | "mov r5, r5, ror #16 \n" | 61 | "mov r5, r5, ror #16 \n" |
61 | "pkhtb r4, r5, r4, asr #16 \n" | 62 | "pkhtb r4, r5, r4, asr #16 \n" |
62 | "sadd16 r0, r0, r4 \n" | 63 | "sadd16 r0, r0, r4 \n" |
63 | "pkhbt r5, r5, r6, lsl #16 \n" | 64 | "pkhbt r5, r5, r6, lsl #16 \n" |
64 | "sadd16 r1, r1, r5 \n" | 65 | "sadd16 r1, r1, r5 \n" |
66 | "ldmia %[v2]!, {r4-r5} \n" | ||
65 | "mov r7, r7, ror #16 \n" | 67 | "mov r7, r7, ror #16 \n" |
66 | "pkhtb r6, r7, r6, asr #16 \n" | 68 | "pkhtb r6, r7, r6, asr #16 \n" |
67 | "sadd16 r2, r2, r6 \n" | 69 | "sadd16 r2, r2, r6 \n" |
68 | "pkhbt r7, r7, r8, lsl #16 \n" | 70 | "pkhbt r7, r7, r4, lsl #16 \n" |
69 | "sadd16 r3, r3, r7 \n" | 71 | "sadd16 r3, r3, r7 \n" |
70 | "stmia %[v1]!, {r0-r3} \n" | 72 | "stmia %[v1]!, {r0-r3} \n" |
71 | #if ORDER > 16 | 73 | #if ORDER > 16 |
72 | "mov r4, r8 \n" | ||
73 | "subs %[cnt], %[cnt], #1 \n" | 74 | "subs %[cnt], %[cnt], #1 \n" |
74 | "bne 1b \n" | 75 | "bne 1b \n" |
75 | #endif | 76 | #endif |
@@ -106,7 +107,7 @@ static inline void vector_add(int16_t* v1, int16_t* v2) | |||
106 | : /* inputs */ | 107 | : /* inputs */ |
107 | : /* clobbers */ | 108 | : /* clobbers */ |
108 | "r0", "r1", "r2", "r3", "r4", | 109 | "r0", "r1", "r2", "r3", "r4", |
109 | "r5", "r6", "r7", "r8", "memory" | 110 | "r5", "r6", "r7", "memory" |
110 | ); | 111 | ); |
111 | } | 112 | } |
112 | 113 | ||
@@ -125,37 +126,38 @@ static inline void vector_sub(int16_t* v1, int16_t* v2) | |||
125 | 126 | ||
126 | "10: \n" | 127 | "10: \n" |
127 | "ldrh r4, [%[v2]], #2 \n" | 128 | "ldrh r4, [%[v2]], #2 \n" |
129 | "ldr r5, [%[v2]], #4 \n" | ||
128 | "mov r4, r4, lsl #16 \n" | 130 | "mov r4, r4, lsl #16 \n" |
129 | "1: \n" | 131 | "1: \n" |
130 | "ldmia %[v2]!, {r5-r8} \n" | 132 | "ldmia %[v2]!, {r6-r7} \n" |
131 | "ldmia %[v1], {r0-r3} \n" | 133 | "ldmia %[v1], {r0-r3} \n" |
132 | "mov r5, r5, ror #16 \n" | 134 | "mov r5, r5, ror #16 \n" |
133 | "pkhtb r4, r5, r4, asr #16 \n" | 135 | "pkhtb r4, r5, r4, asr #16 \n" |
134 | "ssub16 r0, r0, r4 \n" | 136 | "ssub16 r0, r0, r4 \n" |
135 | "pkhbt r5, r5, r6, lsl #16 \n" | 137 | "pkhbt r5, r5, r6, lsl #16 \n" |
136 | "ssub16 r1, r1, r5 \n" | 138 | "ssub16 r1, r1, r5 \n" |
139 | "ldmia %[v2]!, {r4-r5} \n" | ||
137 | "mov r7, r7, ror #16 \n" | 140 | "mov r7, r7, ror #16 \n" |
138 | "pkhtb r6, r7, r6, asr #16 \n" | 141 | "pkhtb r6, r7, r6, asr #16 \n" |
139 | "ssub16 r2, r2, r6 \n" | 142 | "ssub16 r2, r2, r6 \n" |
140 | "pkhbt r7, r7, r8, lsl #16 \n" | 143 | "pkhbt r7, r7, r4, lsl #16 \n" |
141 | "ssub16 r3, r3, r7 \n" | 144 | "ssub16 r3, r3, r7 \n" |
142 | "stmia %[v1]!, {r0-r3} \n" | 145 | "stmia %[v1]!, {r0-r3} \n" |
143 | "mov r4, r8 \n" | 146 | "ldmia %[v2]!, {r6-r7} \n" |
144 | "ldmia %[v2]!, {r5-r8} \n" | ||
145 | "ldmia %[v1], {r0-r3} \n" | 147 | "ldmia %[v1], {r0-r3} \n" |
146 | "mov r5, r5, ror #16 \n" | 148 | "mov r5, r5, ror #16 \n" |
147 | "pkhtb r4, r5, r4, asr #16 \n" | 149 | "pkhtb r4, r5, r4, asr #16 \n" |
148 | "ssub16 r0, r0, r4 \n" | 150 | "ssub16 r0, r0, r4 \n" |
149 | "pkhbt r5, r5, r6, lsl #16 \n" | 151 | "pkhbt r5, r5, r6, lsl #16 \n" |
150 | "ssub16 r1, r1, r5 \n" | 152 | "ssub16 r1, r1, r5 \n" |
153 | "ldmia %[v2]!, {r4-r5} \n" | ||
151 | "mov r7, r7, ror #16 \n" | 154 | "mov r7, r7, ror #16 \n" |
152 | "pkhtb r6, r7, r6, asr #16 \n" | 155 | "pkhtb r6, r7, r6, asr #16 \n" |
153 | "ssub16 r2, r2, r6 \n" | 156 | "ssub16 r2, r2, r6 \n" |
154 | "pkhbt r7, r7, r8, lsl #16 \n" | 157 | "pkhbt r7, r7, r4, lsl #16 \n" |
155 | "ssub16 r3, r3, r7 \n" | 158 | "ssub16 r3, r3, r7 \n" |
156 | "stmia %[v1]!, {r0-r3} \n" | 159 | "stmia %[v1]!, {r0-r3} \n" |
157 | #if ORDER > 16 | 160 | #if ORDER > 16 |
158 | "mov r4, r8 \n" | ||
159 | "subs %[cnt], %[cnt], #1 \n" | 161 | "subs %[cnt], %[cnt], #1 \n" |
160 | "bne 1b \n" | 162 | "bne 1b \n" |
161 | #endif | 163 | #endif |
@@ -192,7 +194,7 @@ static inline void vector_sub(int16_t* v1, int16_t* v2) | |||
192 | : /* inputs */ | 194 | : /* inputs */ |
193 | : /* clobbers */ | 195 | : /* clobbers */ |
194 | "r0", "r1", "r2", "r3", "r4", | 196 | "r0", "r1", "r2", "r3", "r4", |
195 | "r5", "r6", "r7", "r8", "memory" | 197 | "r5", "r6", "r7", "memory" |
196 | ); | 198 | ); |
197 | } | 199 | } |
198 | 200 | ||