diff options
Diffstat (limited to 'apps/codecs/lib')
-rw-r--r-- | apps/codecs/lib/asm_arm.h | 142 |
1 files changed, 86 insertions, 56 deletions
diff --git a/apps/codecs/lib/asm_arm.h b/apps/codecs/lib/asm_arm.h index 627f4afd78..c0f9440450 100644 --- a/apps/codecs/lib/asm_arm.h +++ b/apps/codecs/lib/asm_arm.h | |||
@@ -19,20 +19,30 @@ | |||
19 | #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_) | 19 | #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_) |
20 | #define _V_WIDE_MATH | 20 | #define _V_WIDE_MATH |
21 | 21 | ||
22 | #if ARM_ARCH >= 6 | ||
22 | static inline int32_t MULT32(int32_t x, int32_t y) { | 23 | static inline int32_t MULT32(int32_t x, int32_t y) { |
23 | int lo,hi; | 24 | int32_t hi; |
24 | asm volatile("smull\t%0, %1, %2, %3" | 25 | asm volatile("smmul %[hi], %[x], %[y] \n\t" |
26 | : [hi] "=&r" (hi) | ||
27 | : [x] "r" (x), [y] "r" (y) ); | ||
28 | return(hi); | ||
29 | } | ||
30 | #else | ||
31 | static inline int32_t MULT32(int32_t x, int32_t y) { | ||
32 | int32_t lo, hi; | ||
33 | asm volatile("smull\t%0, %1, %2, %3 \n\t" | ||
25 | : "=&r"(lo),"=&r"(hi) | 34 | : "=&r"(lo),"=&r"(hi) |
26 | : "r"(x),"r"(y) ); | 35 | : "r"(x),"r"(y) ); |
27 | return(hi); | 36 | return(hi); |
28 | } | 37 | } |
38 | #endif | ||
29 | 39 | ||
30 | static inline int32_t MULT31(int32_t x, int32_t y) { | 40 | static inline int32_t MULT31(int32_t x, int32_t y) { |
31 | return MULT32(x,y)<<1; | 41 | return MULT32(x,y)<<1; |
32 | } | 42 | } |
33 | 43 | ||
34 | static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) { | 44 | static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) { |
35 | int lo,hi; | 45 | int32_t lo,hi; |
36 | asm volatile("smull %0, %1, %2, %3\n\t" | 46 | asm volatile("smull %0, %1, %2, %3\n\t" |
37 | "movs %0, %0, lsr #15\n\t" | 47 | "movs %0, %0, lsr #15\n\t" |
38 | "adc %1, %0, %1, lsl #17\n\t" | 48 | "adc %1, %0, %1, lsl #17\n\t" |
@@ -44,75 +54,95 @@ static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) { | |||
44 | 54 | ||
45 | #define XPROD32(a, b, t, v, x, y) \ | 55 | #define XPROD32(a, b, t, v, x, y) \ |
46 | { \ | 56 | { \ |
47 | long l; \ | 57 | int32_t l; \ |
48 | asm( "smull %0, %1, %3, %5\n\t" \ | 58 | asm("smull %0, %1, %3, %5\n\t" \ |
49 | "rsb %2, %6, #0\n\t" \ | 59 | "rsb %2, %6, #0\n\t" \ |
50 | "smlal %0, %1, %4, %6\n\t" \ | 60 | "smlal %0, %1, %4, %6\n\t" \ |
51 | "smull %0, %2, %3, %2\n\t" \ | 61 | "smull %0, %2, %3, %2\n\t" \ |
52 | "smlal %0, %2, %4, %5" \ | 62 | "smlal %0, %2, %4, %5" \ |
53 | : "=&r" (l), "=&r" (x), "=&r" (y) \ | 63 | : "=&r" (l), "=&r" (x), "=&r" (y) \ |
54 | : "r" ((a)), "r" ((b)), "r" ((t)), "r" ((v)) ); \ | 64 | : "r" ((a)), "r" ((b)), "r" ((t)), "r" ((v)) ); \ |
55 | } | 65 | } |
56 | 66 | ||
57 | static inline void XPROD31(int32_t a, int32_t b, | 67 | #if ARM_ARCH >= 6 |
58 | int32_t t, int32_t v, | 68 | /* These may yield slightly different result from the macros below |
59 | int32_t *x, int32_t *y) | 69 | because only the high 32 bits of the multiplications are accumulated while |
60 | { | 70 | the below macros use a 64 bit accumulator that is truncated to 32 bits.*/ |
61 | int x1, y1, l; | 71 | #define XPROD31_R(_a, _b, _t, _v, _x, _y)\ |
62 | asm( "smull %0, %1, %3, %5\n\t" | 72 | {\ |
63 | "rsb %2, %6, #0\n\t" | 73 | int32_t x1, y1;\ |
64 | "smlal %0, %1, %4, %6\n\t" | 74 | asm("smmul %[x1], %[t], %[a] \n\t"\ |
65 | "smull %0, %2, %3, %2\n\t" | 75 | "smmul %[y1], %[t], %[b] \n\t"\ |
66 | "smlal %0, %2, %4, %5" | 76 | "smmla %[x1], %[v], %[b], %[x1] \n\t"\ |
67 | : "=&r" (l), "=&r" (x1), "=&r" (y1) | 77 | "smmls %[y1], %[v], %[a], %[y1] \n\t"\ |
68 | : "r" (a), "r" (b), "r" (t), "r" (v) ); | 78 | : [x1] "=&r" (x1), [y1] "=&r" (y1)\ |
69 | *x = x1 << 1; | 79 | : [a] "r" (_a), [b] "r" (_b), [t] "r" (_t), [v] "r" (_v) );\ |
70 | *y = y1 << 1; | 80 | _x = x1 << 1;\ |
81 | _y = y1 << 1;\ | ||
71 | } | 82 | } |
72 | 83 | ||
73 | static inline void XNPROD31(int32_t a, int32_t b, | 84 | #define XNPROD31_R(_a, _b, _t, _v, _x, _y)\ |
74 | int32_t t, int32_t v, | 85 | {\ |
75 | int32_t *x, int32_t *y) | 86 | int32_t x1, y1;\ |
76 | { | 87 | asm("smmul %[x1], %[t], %[a] \n\t"\ |
77 | int x1, y1, l; | 88 | "smmul %[y1], %[t], %[b] \n\t"\ |
78 | asm( "smull %0, %1, %3, %5\n\t" | 89 | "smmls %[x1], %[v], %[b], %[x1] \n\t"\ |
79 | "rsb %2, %4, #0\n\t" | 90 | "smmla %[y1], %[v], %[a], %[y1] \n\t"\ |
80 | "smlal %0, %1, %2, %6\n\t" | 91 | : [x1] "=&r" (x1), [y1] "=&r" (y1)\ |
81 | "smull %0, %2, %4, %5\n\t" | 92 | : [a] "r" (_a), [b] "r" (_b), [t] "r" (_t), [v] "r" (_v) );\ |
82 | "smlal %0, %2, %3, %6" | 93 | _x = x1 << 1;\ |
83 | : "=&r" (l), "=&r" (x1), "=&r" (y1) | 94 | _y = y1 << 1;\ |
84 | : "r" (a), "r" (b), "r" (t), "r" (v) ); | ||
85 | *x = x1 << 1; | ||
86 | *y = y1 << 1; | ||
87 | } | 95 | } |
88 | 96 | #else | |
89 | #define XPROD31_R(_a, _b, _t, _v, _x, _y)\ | 97 | #define XPROD31_R(_a, _b, _t, _v, _x, _y)\ |
90 | {\ | 98 | {\ |
91 | int x1, y1, l;\ | 99 | int32_t x1, y1, l;\ |
92 | asm( "smull %0, %1, %5, %3\n\t"\ | 100 | asm("smull %0, %1, %5, %3\n\t"\ |
93 | "rsb %2, %3, #0\n\t"\ | 101 | "rsb %2, %3, #0\n\t"\ |
94 | "smlal %0, %1, %6, %4\n\t"\ | 102 | "smlal %0, %1, %6, %4\n\t"\ |
95 | "smull %0, %2, %6, %2\n\t"\ | 103 | "smull %0, %2, %6, %2\n\t"\ |
96 | "smlal %0, %2, %5, %4"\ | 104 | "smlal %0, %2, %5, %4"\ |
97 | : "=&r" (l), "=&r" (x1), "=&r" (y1)\ | 105 | : "=&r" (l), "=&r" (x1), "=&r" (y1)\ |
98 | : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\ | 106 | : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\ |
99 | _x = x1 << 1;\ | 107 | _x = x1 << 1;\ |
100 | _y = y1 << 1;\ | 108 | _y = y1 << 1;\ |
101 | } | 109 | } |
102 | 110 | ||
103 | #define XNPROD31_R(_a, _b, _t, _v, _x, _y)\ | 111 | #define XNPROD31_R(_a, _b, _t, _v, _x, _y)\ |
104 | {\ | 112 | {\ |
105 | int x1, y1, l;\ | 113 | int32_t x1, y1, l;\ |
106 | asm( "smull %0, %1, %5, %3\n\t"\ | 114 | asm("smull %0, %1, %5, %3\n\t"\ |
107 | "rsb %2, %4, #0\n\t"\ | 115 | "rsb %2, %4, #0\n\t"\ |
108 | "smlal %0, %1, %6, %2\n\t"\ | 116 | "smlal %0, %1, %6, %2\n\t"\ |
109 | "smull %0, %2, %5, %4\n\t"\ | 117 | "smull %0, %2, %5, %4\n\t"\ |
110 | "smlal %0, %2, %6, %3"\ | 118 | "smlal %0, %2, %6, %3"\ |
111 | : "=&r" (l), "=&r" (x1), "=&r" (y1)\ | 119 | : "=&r" (l), "=&r" (x1), "=&r" (y1)\ |
112 | : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\ | 120 | : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\ |
113 | _x = x1 << 1;\ | 121 | _x = x1 << 1;\ |
114 | _y = y1 << 1;\ | 122 | _y = y1 << 1;\ |
115 | } | 123 | } |
124 | #endif | ||
125 | |||
126 | static inline void XPROD31(int32_t a, int32_t b, | ||
127 | int32_t t, int32_t v, | ||
128 | int32_t *x, int32_t *y) | ||
129 | { | ||
130 | int32_t _x1, _y1; | ||
131 | XPROD31_R(a, b, t, v, _x1, _y1); | ||
132 | *x = _x1; | ||
133 | *y = _y1; | ||
134 | } | ||
135 | |||
136 | static inline void XNPROD31(int32_t a, int32_t b, | ||
137 | int32_t t, int32_t v, | ||
138 | int32_t *x, int32_t *y) | ||
139 | { | ||
140 | int32_t _x1, _y1; | ||
141 | XNPROD31_R(a, b, t, v, _x1, _y1); | ||
142 | *x = _x1; | ||
143 | *y = _y1; | ||
144 | } | ||
145 | |||
116 | 146 | ||
117 | #ifndef _V_VECT_OPS | 147 | #ifndef _V_VECT_OPS |
118 | #define _V_VECT_OPS | 148 | #define _V_VECT_OPS |