diff options
author | Chris Chua <someone42@gmail.com> | 2023-03-19 06:22:08 +1100 |
---|---|---|
committer | Aidan MacDonald <amachronic@protonmail.com> | 2023-03-23 13:28:22 -0400 |
commit | 86429dbf1eca8ee0e08176997f508647c3abf6bd (patch) | |
tree | 4d35e56e338a326d1a04c8d1f620821fa7909678 /lib/rbcodec | |
parent | a64cad847e7d24dc4d01d5ab22f6c8dc42f960ae (diff) | |
download | rockbox-86429dbf1eca8ee0e08176997f508647c3abf6bd.tar.gz rockbox-86429dbf1eca8ee0e08176997f508647c3abf6bd.zip |
Using ARM Unified Assembler Language
Change-Id: Iae32a8ba8eff6087330e458fafc912a12fee4509
Diffstat (limited to 'lib/rbcodec')
-rw-r--r-- | lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S | 2 | ||||
-rw-r--r-- | lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h | 23 | ||||
-rw-r--r-- | lib/rbcodec/codecs/libtta/filter_arm.S | 4 | ||||
-rw-r--r-- | lib/rbcodec/dsp/dsp_arm.S | 6 | ||||
-rw-r--r-- | lib/rbcodec/dsp/dsp_arm_v6.S | 5 |
5 files changed, 22 insertions, 18 deletions
diff --git a/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S index 7b851659bd..1d19160a91 100644 --- a/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S +++ b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S | |||
@@ -225,7 +225,7 @@ udiv32_arm: | |||
225 | mov \inv, \divisor, lsl \bits | 225 | mov \inv, \divisor, lsl \bits |
226 | add \neg, pc, \inv, lsr #25 | 226 | add \neg, pc, \inv, lsr #25 |
227 | cmp \inv, #1<<31 | 227 | cmp \inv, #1<<31 |
228 | ldrhib \inv, [\neg, #.L_udiv_est_table-.-64] | 228 | ldrbhi \inv, [\neg, #.L_udiv_est_table-.-64] |
229 | bls 20f | 229 | bls 20f |
230 | subs \bits, \bits, #7 | 230 | subs \bits, \bits, #7 |
231 | rsb \neg, \divisor, #0 | 231 | rsb \neg, \divisor, #0 |
diff --git a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h index 8d27331b62..1da090efbb 100644 --- a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h +++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h | |||
@@ -45,6 +45,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
45 | #endif | 45 | #endif |
46 | 46 | ||
47 | asm volatile ( | 47 | asm volatile ( |
48 | ".syntax unified \n" | ||
48 | #if ORDER > 32 | 49 | #if ORDER > 32 |
49 | "mov %[res], #0 \n" | 50 | "mov %[res], #0 \n" |
50 | #endif | 51 | #endif |
@@ -117,7 +118,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
117 | "smladx %[res], r1, r2, %[res] \n" | 118 | "smladx %[res], r1, r2, %[res] \n" |
118 | #if ORDER > 32 | 119 | #if ORDER > 32 |
119 | "subs %[cnt], %[cnt], #1 \n" | 120 | "subs %[cnt], %[cnt], #1 \n" |
120 | "ldmneia %[f2]!, {r2,r4} \n" | 121 | "ldmiane %[f2]!, {r2,r4} \n" |
121 | "sadd16 r0, r0, r7 \n" | 122 | "sadd16 r0, r0, r7 \n" |
122 | "sadd16 r1, r1, r5 \n" | 123 | "sadd16 r1, r1, r5 \n" |
123 | "strd r0, [%[v1]], #8 \n" | 124 | "strd r0, [%[v1]], #8 \n" |
@@ -172,8 +173,8 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
172 | "smlad %[res], r3, r5, %[res] \n" | 173 | "smlad %[res], r3, r5, %[res] \n" |
173 | #if ORDER > 32 | 174 | #if ORDER > 32 |
174 | "subs %[cnt], %[cnt], #1 \n" | 175 | "subs %[cnt], %[cnt], #1 \n" |
175 | "ldrned r4, [%[f2]], #8 \n" | 176 | "ldrdne r4, [%[f2]], #8 \n" |
176 | "ldrned r0, [%[v1], #8] \n" | 177 | "ldrdne r0, [%[v1], #8] \n" |
177 | "sadd16 r2, r2, r6 \n" | 178 | "sadd16 r2, r2, r6 \n" |
178 | "sadd16 r3, r3, r7 \n" | 179 | "sadd16 r3, r3, r7 \n" |
179 | "strd r2, [%[v1]], #8 \n" | 180 | "strd r2, [%[v1]], #8 \n" |
@@ -214,6 +215,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
214 | #endif | 215 | #endif |
215 | 216 | ||
216 | asm volatile ( | 217 | asm volatile ( |
218 | ".syntax unified \n" | ||
217 | #if ORDER > 32 | 219 | #if ORDER > 32 |
218 | "mov %[res], #0 \n" | 220 | "mov %[res], #0 \n" |
219 | #endif | 221 | #endif |
@@ -286,7 +288,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
286 | "smladx %[res], r1, r2, %[res] \n" | 288 | "smladx %[res], r1, r2, %[res] \n" |
287 | #if ORDER > 32 | 289 | #if ORDER > 32 |
288 | "subs %[cnt], %[cnt], #1 \n" | 290 | "subs %[cnt], %[cnt], #1 \n" |
289 | "ldmneia %[f2]!, {r2,r4} \n" | 291 | "ldmiane %[f2]!, {r2,r4} \n" |
290 | "ssub16 r0, r0, r7 \n" | 292 | "ssub16 r0, r0, r7 \n" |
291 | "ssub16 r1, r1, r5 \n" | 293 | "ssub16 r1, r1, r5 \n" |
292 | "strd r0, [%[v1]], #8 \n" | 294 | "strd r0, [%[v1]], #8 \n" |
@@ -341,8 +343,8 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
341 | "smlad %[res], r3, r5, %[res] \n" | 343 | "smlad %[res], r3, r5, %[res] \n" |
342 | #if ORDER > 32 | 344 | #if ORDER > 32 |
343 | "subs %[cnt], %[cnt], #1 \n" | 345 | "subs %[cnt], %[cnt], #1 \n" |
344 | "ldrned r4, [%[f2]], #8 \n" | 346 | "ldrdne r4, [%[f2]], #8 \n" |
345 | "ldrned r0, [%[v1], #8] \n" | 347 | "ldrdne r0, [%[v1], #8] \n" |
346 | "ssub16 r2, r2, r6 \n" | 348 | "ssub16 r2, r2, r6 \n" |
347 | "ssub16 r3, r3, r7 \n" | 349 | "ssub16 r3, r3, r7 \n" |
348 | "strd r2, [%[v1]], #8 \n" | 350 | "strd r2, [%[v1]], #8 \n" |
@@ -381,6 +383,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
381 | #endif | 383 | #endif |
382 | 384 | ||
383 | asm volatile ( | 385 | asm volatile ( |
386 | ".syntax unified \n" | ||
384 | #if ORDER > 32 | 387 | #if ORDER > 32 |
385 | "mov %[res], #0 \n" | 388 | "mov %[res], #0 \n" |
386 | #endif | 389 | #endif |
@@ -421,10 +424,10 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
421 | "pkhtb r1, r7, r4 \n" | 424 | "pkhtb r1, r7, r4 \n" |
422 | #if ORDER > 32 | 425 | #if ORDER > 32 |
423 | "subs %[cnt], %[cnt], #1 \n" | 426 | "subs %[cnt], %[cnt], #1 \n" |
424 | "ldrned r6, [%[v2]], #8 \n" | 427 | "ldrdne r6, [%[v2]], #8 \n" |
425 | "smladx %[res], r2, r1, %[res] \n" | 428 | "smladx %[res], r2, r1, %[res] \n" |
426 | "pkhtb r2, r4, r5 \n" | 429 | "pkhtb r2, r4, r5 \n" |
427 | "ldrned r0, [%[v1]], #8 \n" | 430 | "ldrdne r0, [%[v1]], #8 \n" |
428 | "smladx %[res], r3, r2, %[res] \n" | 431 | "smladx %[res], r3, r2, %[res] \n" |
429 | "bne 1b \n" | 432 | "bne 1b \n" |
430 | #else | 433 | #else |
@@ -461,9 +464,9 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
461 | "ldrd r4, [%[v2]], #8 \n" | 464 | "ldrd r4, [%[v2]], #8 \n" |
462 | "smlad %[res], r1, r6, %[res] \n" | 465 | "smlad %[res], r1, r6, %[res] \n" |
463 | "subs %[cnt], %[cnt], #1 \n" | 466 | "subs %[cnt], %[cnt], #1 \n" |
464 | "ldrned r0, [%[v1]], #8 \n" | 467 | "ldrdne r0, [%[v1]], #8 \n" |
465 | "smlad %[res], r2, r7, %[res] \n" | 468 | "smlad %[res], r2, r7, %[res] \n" |
466 | "ldrned r6, [%[v2]], #8 \n" | 469 | "ldrdne r6, [%[v2]], #8 \n" |
467 | "smlad %[res], r3, r4, %[res] \n" | 470 | "smlad %[res], r3, r4, %[res] \n" |
468 | "bne 1b \n" | 471 | "bne 1b \n" |
469 | #else | 472 | #else |
diff --git a/lib/rbcodec/codecs/libtta/filter_arm.S b/lib/rbcodec/codecs/libtta/filter_arm.S index f3959b83ca..10f1491796 100644 --- a/lib/rbcodec/codecs/libtta/filter_arm.S +++ b/lib/rbcodec/codecs/libtta/filter_arm.S | |||
@@ -165,8 +165,8 @@ hybrid_filter: | |||
165 | sub r10, r11, r10 | 165 | sub r10, r11, r10 |
166 | 166 | ||
167 | @ set to the memory: *pA, *(pA-1), *(pA-2), *(pA-3), *pM, *(pM-1), *(pM-2), *(pM-3) | 167 | @ set to the memory: *pA, *(pA-1), *(pA-2), *(pA-3), *pM, *(pM-1), *(pM-2), *(pM-3) |
168 | stmneda r2, {r10, r11, r12, lr} | 168 | stmdane r2, {r10, r11, r12, lr} |
169 | stmneda r3, {r5, r6, r7, r8} | 169 | stmdane r3, {r5, r6, r7, r8} |
170 | ldmpc cond=ne regs=r4-r12 @ hybrid_filter end (when fs->index != 0) | 170 | ldmpc cond=ne regs=r4-r12 @ hybrid_filter end (when fs->index != 0) |
171 | 171 | ||
172 | .hf_memshl: | 172 | .hf_memshl: |
diff --git a/lib/rbcodec/dsp/dsp_arm.S b/lib/rbcodec/dsp/dsp_arm.S index 16394b8690..864abee4b6 100644 --- a/lib/rbcodec/dsp/dsp_arm.S +++ b/lib/rbcodec/dsp/dsp_arm.S | |||
@@ -323,7 +323,7 @@ resample_hermite: | |||
323 | add r6, r6, r0, lsl #2 @ r6 = &s[pos] | 323 | add r6, r6, r0, lsl #2 @ r6 = &s[pos] |
324 | 324 | ||
325 | cmp r0, #3 @ pos >= 3? history not needed | 325 | cmp r0, #3 @ pos >= 3? history not needed |
326 | ldmgedb r6, { r1-r3 } @ x3..x1 = s[pos-3]..s[pos-1] | 326 | ldmdbge r6, { r1-r3 } @ x3..x1 = s[pos-3]..s[pos-1] |
327 | bge .hrs_loadhist_done @ | 327 | bge .hrs_loadhist_done @ |
328 | add r10, r0, r0, lsl #1 @ branch pc + pos*12 | 328 | add r10, r0, r0, lsl #1 @ branch pc + pos*12 |
329 | add pc, pc, r10, lsl #2 @ | 329 | add pc, pc, r10, lsl #2 @ |
@@ -496,7 +496,7 @@ resample_hermite: | |||
496 | ldmfd sp!, { r10, r12 } @ recover ch, h | 496 | ldmfd sp!, { r10, r12 } @ recover ch, h |
497 | subs r10, r10, #1 @ --ch | 497 | subs r10, r10, #1 @ --ch |
498 | stmia r12!, { r1-r3 } @ h[0..2] = x3..x1 | 498 | stmia r12!, { r1-r3 } @ h[0..2] = x3..x1 |
499 | ldmgtia sp, { r0-r2 } @ load data, src, dst | 499 | ldmiagt sp, { r0-r2 } @ load data, src, dst |
500 | bgt .hrs_channel_loop | 500 | bgt .hrs_channel_loop |
501 | 501 | ||
502 | ldmfd sp!, { r1-r3 } @ pop data, src, dst | 502 | ldmfd sp!, { r1-r3 } @ pop data, src, dst |
@@ -614,7 +614,7 @@ filter_process: | |||
614 | ldr r0, [sp] @ r0 = history[channels-ch-1] | 614 | ldr r0, [sp] @ r0 = history[channels-ch-1] |
615 | subs r3, r3, #1 @ all channels processed? | 615 | subs r3, r3, #1 @ all channels processed? |
616 | stmia r0!, { r9-r12 } @ save back history, history++ | 616 | stmia r0!, { r9-r12 } @ save back history, history++ |
617 | ldmhsib sp, { r1-r2 } @ r1 = buf, r2 = count | 617 | ldmibhs sp, { r1-r2 } @ r1 = buf, r2 = count |
618 | strhs r3, [sp, #12] @ store ch | 618 | strhs r3, [sp, #12] @ store ch |
619 | strhs r0, [sp] @ store history[channels-ch-1] | 619 | strhs r0, [sp] @ store history[channels-ch-1] |
620 | bhs .fp_channelloop | 620 | bhs .fp_channelloop |
diff --git a/lib/rbcodec/dsp/dsp_arm_v6.S b/lib/rbcodec/dsp/dsp_arm_v6.S index a36760f744..aa27ec90f6 100644 --- a/lib/rbcodec/dsp/dsp_arm_v6.S +++ b/lib/rbcodec/dsp/dsp_arm_v6.S | |||
@@ -18,6 +18,7 @@ | |||
18 | * KIND, either express or implied. | 18 | * KIND, either express or implied. |
19 | * | 19 | * |
20 | ****************************************************************************/ | 20 | ****************************************************************************/ |
21 | #include "rbcodecconfig.h" | ||
21 | 22 | ||
22 | /**************************************************************************** | 23 | /**************************************************************************** |
23 | * void sample_output_mono(struct sample_io_data *this, | 24 | * void sample_output_mono(struct sample_io_data *this, |
@@ -56,7 +57,7 @@ sample_output_mono: | |||
56 | stmia r3!, { r12, r14 } @ store So0, So1 | 57 | stmia r3!, { r12, r14 } @ store So0, So1 |
57 | bgt 1b @ | 58 | bgt 1b @ |
58 | @ | 59 | @ |
59 | ldmltfd sp!, { r4, pc } @ if count was even, we're done | 60 | ldmfdlt sp!, { r4, pc } @ if count was even, we're done |
60 | @ | 61 | @ |
61 | 2: @ | 62 | 2: @ |
62 | ldr r12, [r2] @ round, scale, saturate | 63 | ldr r12, [r2] @ round, scale, saturate |
@@ -113,7 +114,7 @@ sample_output_stereo: | |||
113 | stmia r3!, { r6, r7 } @ store So0, So1 | 114 | stmia r3!, { r6, r7 } @ store So0, So1 |
114 | bgt 1b @ | 115 | bgt 1b @ |
115 | @ | 116 | @ |
116 | ldmltfd sp!, { r4-r7, pc } @ if count was even, we're done | 117 | ldmfdlt sp!, { r4-r7, pc } @ if count was even, we're done |
117 | @ | 118 | @ |
118 | 2: @ | 119 | 2: @ |
119 | ldr r6, [r2] @ r6 = Li | 120 | ldr r6, [r2] @ r6 = Li |