summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/plugins/mpegplayer/motion_comp_arm_s.S254
1 files changed, 136 insertions, 118 deletions
diff --git a/apps/plugins/mpegplayer/motion_comp_arm_s.S b/apps/plugins/mpegplayer/motion_comp_arm_s.S
index 36c3fec06a..fb29d59e99 100644
--- a/apps/plugins/mpegplayer/motion_comp_arm_s.S
+++ b/apps/plugins/mpegplayer/motion_comp_arm_s.S
@@ -20,6 +20,8 @@
20@ 20@
21@ $Id$ 21@ $Id$
22 22
23#include "config.h" /* Rockbox: ARM architecture version */
24
23 .text 25 .text
24 26
25@ ---------------------------------------------------------------- 27@ ----------------------------------------------------------------
@@ -28,11 +30,14 @@
28MC_put_o_16: 30MC_put_o_16:
29 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) 31 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
30 @@ pld [r1] 32 @@ pld [r1]
31 stmfd sp!, {r4-r11, lr} @ R14 is also called LR 33 stmfd sp!, {r4-r7, lr} @ R14 is also called LR
32 and r4, r1, #3 34 and r4, r1, #3
33 adr r5, MC_put_o_16_align_jt 35 ldr pc, [pc, r4, lsl #2]
34 add r5, r5, r4, lsl #2 36 .word 0
35 ldr pc, [r5] 37 .word MC_put_o_16_align0
38 .word MC_put_o_16_align1
39 .word MC_put_o_16_align2
40 .word MC_put_o_16_align3
36 41
37MC_put_o_16_align0: 42MC_put_o_16_align0:
38 ldmia r1, {r4-r7} 43 ldmia r1, {r4-r7}
@@ -42,45 +47,55 @@ MC_put_o_16_align0:
42 subs r3, r3, #1 47 subs r3, r3, #1
43 add r0, r0, r2 48 add r0, r0, r2
44 bne MC_put_o_16_align0 49 bne MC_put_o_16_align0
45 ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. 50 ldmfd sp!, {r4-r7, pc} @@ update PC with LR content.
46 51
47.macro PROC shift 52.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4
48 ldmia r1, {r4-r8} 53 mov \R0, \R0, lsr #(\shift)
49 add r1, r1, r2 54 orr \R0, \R0, \R1, lsl #(32 - \shift)
50 mov r9, r4, lsr #(\shift) 55 mov \R1, \R1, lsr #(\shift)
51 @@ pld [r1] 56 orr \R1, \R1, \R2, lsl #(32 - \shift)
52 mov r10, r5, lsr #(\shift) 57 mov \R2, \R2, lsr #(\shift)
53 orr r9, r9, r5, lsl #(32-\shift) 58 orr \R2, \R2, \R3, lsl #(32 - \shift)
54 mov r11, r6, lsr #(\shift) 59 mov \R3, \R3, lsr #(\shift)
55 orr r10, r10, r6, lsl #(32-\shift) 60 orr \R3, \R3, \R4, lsl #(32 - \shift)
56 mov r12, r7, lsr #(\shift) 61 mov \R4, \R4, lsr #(\shift)
57 orr r11, r11, r7, lsl #(32-\shift)
58 orr r12, r12, r8, lsl #(32-\shift)
59 stmia r0, {r9-r12}
60 subs r3, r3, #1
61 add r0, r0, r2
62.endm 62.endm
63 63
64MC_put_o_16_align1: 64MC_put_o_16_align1:
65 and r1, r1, #0xFFFFFFFC 65 and r1, r1, #0xFFFFFFFC
661: PROC(8) 661: ldmia r1, {r4-r7, r12}
67 add r1, r1, r2
68 @@ pld [r1]
69 ADJ_ALIGN_QW 8, r4, r5, r6, r7, r12
70 stmia r0, {r4-r7}
71 subs r3, r3, #1
72 add r0, r0, r2
67 bne 1b 73 bne 1b
68 ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. 74 ldmfd sp!, {r4-r7, pc} @@ update PC with LR content.
75
69MC_put_o_16_align2: 76MC_put_o_16_align2:
70 and r1, r1, #0xFFFFFFFC 77 and r1, r1, #0xFFFFFFFC
711: PROC(16) 781: ldmia r1, {r4-r7, r12}
79 add r1, r1, r2
80 @@ pld [r1]
81 ADJ_ALIGN_QW 16, r4, r5, r6, r7, r12
82 stmia r0, {r4-r7}
83 subs r3, r3, #1
84 add r0, r0, r2
72 bne 1b 85 bne 1b
73 ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. 86 ldmfd sp!, {r4-r7, pc} @@ update PC with LR content.
87
74MC_put_o_16_align3: 88MC_put_o_16_align3:
75 and r1, r1, #0xFFFFFFFC 89 and r1, r1, #0xFFFFFFFC
761: PROC(24) 901: ldmia r1, {r4-r7, r12}
91 add r1, r1, r2
92 @@ pld [r1]
93 ADJ_ALIGN_QW 24, r4, r5, r6, r7, r12
94 stmia r0, {r4-r7}
95 subs r3, r3, #1
96 add r0, r0, r2
77 bne 1b 97 bne 1b
78 ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. 98 ldmfd sp!, {r4-r7, pc} @@ update PC with LR content.
79MC_put_o_16_align_jt:
80 .word MC_put_o_16_align0
81 .word MC_put_o_16_align1
82 .word MC_put_o_16_align2
83 .word MC_put_o_16_align3
84 99
85@ ---------------------------------------------------------------- 100@ ----------------------------------------------------------------
86 .align 101 .align
@@ -88,95 +103,108 @@ MC_put_o_16_align_jt:
88MC_put_o_8: 103MC_put_o_8:
89 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) 104 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
90 @@ pld [r1] 105 @@ pld [r1]
91 stmfd sp!, {r4-r10, lr} @ R14 is also called LR 106 stmfd sp!, {r4, r5, lr} @ R14 is also called LR
92 and r4, r1, #3 107 and r4, r1, #3
93 adr r5, MC_put_o_8_align_jt 108 ldr pc, [pc, r4, lsl #2]
94 add r5, r5, r4, lsl #2 109 .word 0
95 ldr pc, [r5] 110 .word MC_put_o_8_align0
111 .word MC_put_o_8_align1
112 .word MC_put_o_8_align2
113 .word MC_put_o_8_align3
114
96MC_put_o_8_align0: 115MC_put_o_8_align0:
97 ldmia r1, {r4-r5} 116 ldmia r1, {r4, r5}
98 add r1, r1, r2 117 add r1, r1, r2
99 @@ pld [r1] 118 @@ pld [r1]
100 stmia r0, {r4-r5} 119 stmia r0, {r4, r5}
101 add r0, r0, r2 120 add r0, r0, r2
102 subs r3, r3, #1 121 subs r3, r3, #1
103 bne MC_put_o_8_align0 122 bne MC_put_o_8_align0
104 ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. 123 ldmfd sp!, {r4, r5, pc} @@ update PC with LR content.
105 124
106.macro PROC8 shift 125.macro ADJ_ALIGN_DW shift, R0, R1, R2
107 ldmia r1, {r4-r6} 126 mov \R0, \R0, lsr #(\shift)
108 add r1, r1, r2 127 orr \R0, \R0, \R1, lsl #(32 - \shift)
109 mov r9, r4, lsr #(\shift) 128 mov \R1, \R1, lsr #(\shift)
110 @@ pld [r1] 129 orr \R1, \R1, \R2, lsl #(32 - \shift)
111 mov r10, r5, lsr #(\shift) 130 mov \R2, \R2, lsr #(\shift)
112 orr r9, r9, r5, lsl #(32-\shift)
113 orr r10, r10, r6, lsl #(32-\shift)
114 stmia r0, {r9-r10}
115 subs r3, r3, #1
116 add r0, r0, r2
117.endm 131.endm
118 132
119MC_put_o_8_align1: 133MC_put_o_8_align1:
120 and r1, r1, #0xFFFFFFFC 134 and r1, r1, #0xFFFFFFFC
1211: PROC8(8) 1351: ldmia r1, {r4, r5, r12}
136 add r1, r1, r2
137 @@ pld [r1]
138 ADJ_ALIGN_DW 8, r4, r5, r12
139 stmia r0, {r4, r5}
140 subs r3, r3, #1
141 add r0, r0, r2
122 bne 1b 142 bne 1b
123 ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. 143 ldmfd sp!, {r4, r5, pc} @@ update PC with LR content.
124 144
125MC_put_o_8_align2: 145MC_put_o_8_align2:
126 and r1, r1, #0xFFFFFFFC 146 and r1, r1, #0xFFFFFFFC
1271: PROC8(16) 1471: ldmia r1, {r4, r5, r12}
148 add r1, r1, r2
149 @@ pld [r1]
150 ADJ_ALIGN_DW 16, r4, r5, r12
151 stmia r0, {r4, r5}
152 subs r3, r3, #1
153 add r0, r0, r2
128 bne 1b 154 bne 1b
129 ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. 155 ldmfd sp!, {r4, r5, pc} @@ update PC with LR content.
130 156
131MC_put_o_8_align3: 157MC_put_o_8_align3:
132 and r1, r1, #0xFFFFFFFC 158 and r1, r1, #0xFFFFFFFC
1331: PROC8(24) 1591: ldmia r1, {r4, r5, r12}
160 add r1, r1, r2
161 @@ pld [r1]
162 ADJ_ALIGN_DW 24, r4, r5, r12
163 stmia r0, {r4, r5}
164 subs r3, r3, #1
165 add r0, r0, r2
134 bne 1b 166 bne 1b
135 ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. 167 ldmfd sp!, {r4, r5, pc} @@ update PC with LR content.
136
137MC_put_o_8_align_jt:
138 .word MC_put_o_8_align0
139 .word MC_put_o_8_align1
140 .word MC_put_o_8_align2
141 .word MC_put_o_8_align3
142 168
143@ ---------------------------------------------------------------- 169@ ----------------------------------------------------------------
144.macro AVG_PW rW1, rW2 170.macro AVG_PW rW1, rW2
145 mov \rW2, \rW2, lsl #24 171 mov \rW2, \rW2, lsl #24
146 orr \rW2, \rW2, \rW1, lsr #8 172 orr \rW2, \rW2, \rW1, lsr #8
147 eor r9, \rW1, \rW2 173 eor r9, \rW1, \rW2
174#if ARM_ARCH >= 6
175 uhadd8 \rW2, \rW1, \rW2
176#else
148 and \rW2, \rW1, \rW2 177 and \rW2, \rW1, \rW2
149 and r10, r9, r12
150 add \rW2, \rW2, r10, lsr #1
151 and r10, r9, r11 178 and r10, r9, r11
152 add \rW2, \rW2, r10 179 add \rW2, \rW2, r10, lsr #1
180#endif
181 and r9, r9, r12
182 add \rW2, \rW2, r9
153.endm 183.endm
154 184
185#if ARM_ARCH >= 6
186#define HIGH_REGS r9
187#else
188#define HIGH_REGS r9-r11
189#endif
190
155 .align 191 .align
156 .global MC_put_x_16 192 .global MC_put_x_16
157MC_put_x_16: 193MC_put_x_16:
158 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) 194 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
159 @@ pld [r1] 195 @@ pld [r1]
160 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 196 stmfd sp!, {r4-r8, HIGH_REGS, lr} @ R14 is also called LR
161 and r4, r1, #3 197 and r4, r1, #3
162 adr r5, MC_put_x_16_align_jt 198 ldr r12, 2f
163 ldr r11, [r5] 199#if ARM_ARCH < 6
164 mvn r12, r11 200 mvn r11, r12
165 add r5, r5, r4, lsl #2 201#endif
166 ldr pc, [r5, #4] 202 ldr pc, [pc, r4, lsl #2]
167 2032: .word 0x01010101
168.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4 204 .word MC_put_x_16_align0
169 mov \R0, \R0, lsr #(\shift) 205 .word MC_put_x_16_align1
170 orr \R0, \R0, \R1, lsl #(32 - \shift) 206 .word MC_put_x_16_align2
171 mov \R1, \R1, lsr #(\shift) 207 .word MC_put_x_16_align3
172 orr \R1, \R1, \R2, lsl #(32 - \shift)
173 mov \R2, \R2, lsr #(\shift)
174 orr \R2, \R2, \R3, lsl #(32 - \shift)
175 mov \R3, \R3, lsr #(\shift)
176 orr \R3, \R3, \R4, lsl #(32 - \shift)
177 mov \R4, \R4, lsr #(\shift)
178@ and \R4, \R4, #0xFF
179.endm
180 208
181MC_put_x_16_align0: 209MC_put_x_16_align0:
182 ldmia r1, {r4-r8} 210 ldmia r1, {r4-r8}
@@ -190,7 +218,8 @@ MC_put_x_16_align0:
190 subs r3, r3, #1 218 subs r3, r3, #1
191 add r0, r0, r2 219 add r0, r0, r2
192 bne MC_put_x_16_align0 220 bne MC_put_x_16_align0
193 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. 221 ldmfd sp!, {r4-r8, HIGH_REGS, pc} @@ update PC with LR content.
222
194MC_put_x_16_align1: 223MC_put_x_16_align1:
195 and r1, r1, #0xFFFFFFFC 224 and r1, r1, #0xFFFFFFFC
1961: ldmia r1, {r4-r8} 2251: ldmia r1, {r4-r8}
@@ -205,7 +234,8 @@ MC_put_x_16_align1:
205 subs r3, r3, #1 234 subs r3, r3, #1
206 add r0, r0, r2 235 add r0, r0, r2
207 bne 1b 236 bne 1b
208 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. 237 ldmfd sp!, {r4-r8, HIGH_REGS, pc} @@ update PC with LR content.
238
209MC_put_x_16_align2: 239MC_put_x_16_align2:
210 and r1, r1, #0xFFFFFFFC 240 and r1, r1, #0xFFFFFFFC
2111: ldmia r1, {r4-r8} 2411: ldmia r1, {r4-r8}
@@ -220,7 +250,8 @@ MC_put_x_16_align2:
220 subs r3, r3, #1 250 subs r3, r3, #1
221 add r0, r0, r2 251 add r0, r0, r2
222 bne 1b 252 bne 1b
223 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. 253 ldmfd sp!, {r4-r8, HIGH_REGS, pc} @@ update PC with LR content.
254
224MC_put_x_16_align3: 255MC_put_x_16_align3:
225 and r1, r1, #0xFFFFFFFC 256 and r1, r1, #0xFFFFFFFC
2261: ldmia r1, {r4-r8} 2571: ldmia r1, {r4-r8}
@@ -235,13 +266,7 @@ MC_put_x_16_align3:
235 subs r3, r3, #1 266 subs r3, r3, #1
236 add r0, r0, r2 267 add r0, r0, r2
237 bne 1b 268 bne 1b
238 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. 269 ldmfd sp!, {r4-r8, HIGH_REGS, pc} @@ update PC with LR content.
239MC_put_x_16_align_jt:
240 .word 0x01010101
241 .word MC_put_x_16_align0
242 .word MC_put_x_16_align1
243 .word MC_put_x_16_align2
244 .word MC_put_x_16_align3
245 270
246@ ---------------------------------------------------------------- 271@ ----------------------------------------------------------------
247 .align 272 .align
@@ -249,22 +274,18 @@ MC_put_x_16_align_jt:
249MC_put_x_8: 274MC_put_x_8:
250 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) 275 @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
251 @@ pld [r1] 276 @@ pld [r1]
252 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 277 stmfd sp!, {r4-r6, HIGH_REGS, lr} @ R14 is also called LR
253 and r4, r1, #3 278 and r4, r1, #3
254 adr r5, MC_put_x_8_align_jt 279 ldr r12, 2f
255 ldr r11, [r5] 280#if ARM_ARCH < 6
256 mvn r12, r11 281 mvn r11, r12
257 add r5, r5, r4, lsl #2 282#endif
258 ldr pc, [r5, #4] 283 ldr pc, [pc, r4, lsl #2]
259 2842: .word 0x01010101
260.macro ADJ_ALIGN_DW shift, R0, R1, R2 285 .word MC_put_x_8_align0
261 mov \R0, \R0, lsr #(\shift) 286 .word MC_put_x_8_align1
262 orr \R0, \R0, \R1, lsl #(32 - \shift) 287 .word MC_put_x_8_align2
263 mov \R1, \R1, lsr #(\shift) 288 .word MC_put_x_8_align3
264 orr \R1, \R1, \R2, lsl #(32 - \shift)
265 mov \R2, \R2, lsr #(\shift)
266@ and \R4, \R4, #0xFF
267.endm
268 289
269MC_put_x_8_align0: 290MC_put_x_8_align0:
270 ldmia r1, {r4-r6} 291 ldmia r1, {r4-r6}
@@ -276,7 +297,8 @@ MC_put_x_8_align0:
276 subs r3, r3, #1 297 subs r3, r3, #1
277 add r0, r0, r2 298 add r0, r0, r2
278 bne MC_put_x_8_align0 299 bne MC_put_x_8_align0
279 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. 300 ldmfd sp!, {r4-r6, HIGH_REGS, pc} @@ update PC with LR content.
301
280MC_put_x_8_align1: 302MC_put_x_8_align1:
281 and r1, r1, #0xFFFFFFFC 303 and r1, r1, #0xFFFFFFFC
2821: ldmia r1, {r4-r6} 3041: ldmia r1, {r4-r6}
@@ -289,7 +311,8 @@ MC_put_x_8_align1:
289 subs r3, r3, #1 311 subs r3, r3, #1
290 add r0, r0, r2 312 add r0, r0, r2
291 bne 1b 313 bne 1b
292 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. 314 ldmfd sp!, {r4-r6, HIGH_REGS, pc} @@ update PC with LR content.
315
293MC_put_x_8_align2: 316MC_put_x_8_align2:
294 and r1, r1, #0xFFFFFFFC 317 and r1, r1, #0xFFFFFFFC
2951: ldmia r1, {r4-r6} 3181: ldmia r1, {r4-r6}
@@ -302,7 +325,8 @@ MC_put_x_8_align2:
302 subs r3, r3, #1 325 subs r3, r3, #1
303 add r0, r0, r2 326 add r0, r0, r2
304 bne 1b 327 bne 1b
305 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. 328 ldmfd sp!, {r4-r6, HIGH_REGS, pc} @@ update PC with LR content.
329
306MC_put_x_8_align3: 330MC_put_x_8_align3:
307 and r1, r1, #0xFFFFFFFC 331 and r1, r1, #0xFFFFFFFC
3081: ldmia r1, {r4-r6} 3321: ldmia r1, {r4-r6}
@@ -315,10 +339,4 @@ MC_put_x_8_align3:
315 subs r3, r3, #1 339 subs r3, r3, #1
316 add r0, r0, r2 340 add r0, r0, r2
317 bne 1b 341 bne 1b
318 ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. 342 ldmfd sp!, {r4-r6, HIGH_REGS, pc} @@ update PC with LR content.
319MC_put_x_8_align_jt:
320 .word 0x01010101
321 .word MC_put_x_8_align0
322 .word MC_put_x_8_align1
323 .word MC_put_x_8_align2
324 .word MC_put_x_8_align3