summaryrefslogtreecommitdiff
path: root/apps/codecs/demac/libdemac/vector_math16_armv6.h
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2008-10-07 20:52:42 +0000
committerJens Arnold <amiconn@rockbox.org>2008-10-07 20:52:42 +0000
commitdd7cacdc884a1dcd79256760f1b5677e9a142490 (patch)
tree6150583630f57b5edcd9c42eede9042979c33bce /apps/codecs/demac/libdemac/vector_math16_armv6.h
parentd07ac657a428754b177a894959fba03474b181a8 (diff)
downloadrockbox-dd7cacdc884a1dcd79256760f1b5677e9a142490.tar.gz
rockbox-dd7cacdc884a1dcd79256760f1b5677e9a142490.zip
Another minor improvement: better pipelining and one less register used in vector addition/ subtraction.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@18739 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_armv6.h')
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_armv6.h30
1 files changed, 16 insertions, 14 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h
index e180429193..49fa2ceb7d 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv6.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h
@@ -39,37 +39,38 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
39 39
40 "10: \n" 40 "10: \n"
41 "ldrh r4, [%[v2]], #2 \n" 41 "ldrh r4, [%[v2]], #2 \n"
42 "ldr r5, [%[v2]], #4 \n"
42 "mov r4, r4, lsl #16 \n" 43 "mov r4, r4, lsl #16 \n"
43 "1: \n" 44 "1: \n"
44 "ldmia %[v2]!, {r5-r8} \n" 45 "ldmia %[v2]!, {r6-r7} \n"
45 "ldmia %[v1], {r0-r3} \n" 46 "ldmia %[v1], {r0-r3} \n"
46 "mov r5, r5, ror #16 \n" 47 "mov r5, r5, ror #16 \n"
47 "pkhtb r4, r5, r4, asr #16 \n" 48 "pkhtb r4, r5, r4, asr #16 \n"
48 "sadd16 r0, r0, r4 \n" 49 "sadd16 r0, r0, r4 \n"
49 "pkhbt r5, r5, r6, lsl #16 \n" 50 "pkhbt r5, r5, r6, lsl #16 \n"
50 "sadd16 r1, r1, r5 \n" 51 "sadd16 r1, r1, r5 \n"
52 "ldmia %[v2]!, {r4-r5} \n"
51 "mov r7, r7, ror #16 \n" 53 "mov r7, r7, ror #16 \n"
52 "pkhtb r6, r7, r6, asr #16 \n" 54 "pkhtb r6, r7, r6, asr #16 \n"
53 "sadd16 r2, r2, r6 \n" 55 "sadd16 r2, r2, r6 \n"
54 "pkhbt r7, r7, r8, lsl #16 \n" 56 "pkhbt r7, r7, r4, lsl #16 \n"
55 "sadd16 r3, r3, r7 \n" 57 "sadd16 r3, r3, r7 \n"
56 "stmia %[v1]!, {r0-r3} \n" 58 "stmia %[v1]!, {r0-r3} \n"
57 "mov r4, r8 \n" 59 "ldmia %[v2]!, {r6-r7} \n"
58 "ldmia %[v2]!, {r5-r8} \n"
59 "ldmia %[v1], {r0-r3} \n" 60 "ldmia %[v1], {r0-r3} \n"
60 "mov r5, r5, ror #16 \n" 61 "mov r5, r5, ror #16 \n"
61 "pkhtb r4, r5, r4, asr #16 \n" 62 "pkhtb r4, r5, r4, asr #16 \n"
62 "sadd16 r0, r0, r4 \n" 63 "sadd16 r0, r0, r4 \n"
63 "pkhbt r5, r5, r6, lsl #16 \n" 64 "pkhbt r5, r5, r6, lsl #16 \n"
64 "sadd16 r1, r1, r5 \n" 65 "sadd16 r1, r1, r5 \n"
66 "ldmia %[v2]!, {r4-r5} \n"
65 "mov r7, r7, ror #16 \n" 67 "mov r7, r7, ror #16 \n"
66 "pkhtb r6, r7, r6, asr #16 \n" 68 "pkhtb r6, r7, r6, asr #16 \n"
67 "sadd16 r2, r2, r6 \n" 69 "sadd16 r2, r2, r6 \n"
68 "pkhbt r7, r7, r8, lsl #16 \n" 70 "pkhbt r7, r7, r4, lsl #16 \n"
69 "sadd16 r3, r3, r7 \n" 71 "sadd16 r3, r3, r7 \n"
70 "stmia %[v1]!, {r0-r3} \n" 72 "stmia %[v1]!, {r0-r3} \n"
71#if ORDER > 16 73#if ORDER > 16
72 "mov r4, r8 \n"
73 "subs %[cnt], %[cnt], #1 \n" 74 "subs %[cnt], %[cnt], #1 \n"
74 "bne 1b \n" 75 "bne 1b \n"
75#endif 76#endif
@@ -106,7 +107,7 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
106 : /* inputs */ 107 : /* inputs */
107 : /* clobbers */ 108 : /* clobbers */
108 "r0", "r1", "r2", "r3", "r4", 109 "r0", "r1", "r2", "r3", "r4",
109 "r5", "r6", "r7", "r8", "memory" 110 "r5", "r6", "r7", "memory"
110 ); 111 );
111} 112}
112 113
@@ -125,37 +126,38 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
125 126
126 "10: \n" 127 "10: \n"
127 "ldrh r4, [%[v2]], #2 \n" 128 "ldrh r4, [%[v2]], #2 \n"
129 "ldr r5, [%[v2]], #4 \n"
128 "mov r4, r4, lsl #16 \n" 130 "mov r4, r4, lsl #16 \n"
129 "1: \n" 131 "1: \n"
130 "ldmia %[v2]!, {r5-r8} \n" 132 "ldmia %[v2]!, {r6-r7} \n"
131 "ldmia %[v1], {r0-r3} \n" 133 "ldmia %[v1], {r0-r3} \n"
132 "mov r5, r5, ror #16 \n" 134 "mov r5, r5, ror #16 \n"
133 "pkhtb r4, r5, r4, asr #16 \n" 135 "pkhtb r4, r5, r4, asr #16 \n"
134 "ssub16 r0, r0, r4 \n" 136 "ssub16 r0, r0, r4 \n"
135 "pkhbt r5, r5, r6, lsl #16 \n" 137 "pkhbt r5, r5, r6, lsl #16 \n"
136 "ssub16 r1, r1, r5 \n" 138 "ssub16 r1, r1, r5 \n"
139 "ldmia %[v2]!, {r4-r5} \n"
137 "mov r7, r7, ror #16 \n" 140 "mov r7, r7, ror #16 \n"
138 "pkhtb r6, r7, r6, asr #16 \n" 141 "pkhtb r6, r7, r6, asr #16 \n"
139 "ssub16 r2, r2, r6 \n" 142 "ssub16 r2, r2, r6 \n"
140 "pkhbt r7, r7, r8, lsl #16 \n" 143 "pkhbt r7, r7, r4, lsl #16 \n"
141 "ssub16 r3, r3, r7 \n" 144 "ssub16 r3, r3, r7 \n"
142 "stmia %[v1]!, {r0-r3} \n" 145 "stmia %[v1]!, {r0-r3} \n"
143 "mov r4, r8 \n" 146 "ldmia %[v2]!, {r6-r7} \n"
144 "ldmia %[v2]!, {r5-r8} \n"
145 "ldmia %[v1], {r0-r3} \n" 147 "ldmia %[v1], {r0-r3} \n"
146 "mov r5, r5, ror #16 \n" 148 "mov r5, r5, ror #16 \n"
147 "pkhtb r4, r5, r4, asr #16 \n" 149 "pkhtb r4, r5, r4, asr #16 \n"
148 "ssub16 r0, r0, r4 \n" 150 "ssub16 r0, r0, r4 \n"
149 "pkhbt r5, r5, r6, lsl #16 \n" 151 "pkhbt r5, r5, r6, lsl #16 \n"
150 "ssub16 r1, r1, r5 \n" 152 "ssub16 r1, r1, r5 \n"
153 "ldmia %[v2]!, {r4-r5} \n"
151 "mov r7, r7, ror #16 \n" 154 "mov r7, r7, ror #16 \n"
152 "pkhtb r6, r7, r6, asr #16 \n" 155 "pkhtb r6, r7, r6, asr #16 \n"
153 "ssub16 r2, r2, r6 \n" 156 "ssub16 r2, r2, r6 \n"
154 "pkhbt r7, r7, r8, lsl #16 \n" 157 "pkhbt r7, r7, r4, lsl #16 \n"
155 "ssub16 r3, r3, r7 \n" 158 "ssub16 r3, r3, r7 \n"
156 "stmia %[v1]!, {r0-r3} \n" 159 "stmia %[v1]!, {r0-r3} \n"
157#if ORDER > 16 160#if ORDER > 16
158 "mov r4, r8 \n"
159 "subs %[cnt], %[cnt], #1 \n" 161 "subs %[cnt], %[cnt], #1 \n"
160 "bne 1b \n" 162 "bne 1b \n"
161#endif 163#endif
@@ -192,7 +194,7 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
192 : /* inputs */ 194 : /* inputs */
193 : /* clobbers */ 195 : /* clobbers */
194 "r0", "r1", "r2", "r3", "r4", 196 "r0", "r1", "r2", "r3", "r4",
195 "r5", "r6", "r7", "r8", "memory" 197 "r5", "r6", "r7", "memory"
196 ); 198 );
197} 199}
198 200