diff options
author | Dave Hooper <dave@beermex.com> | 2009-07-26 19:06:36 +0000 |
---|---|---|
committer | Dave Hooper <dave@beermex.com> | 2009-07-26 19:06:36 +0000 |
commit | a8d1cfdec8f62f976ba03713da07b88bd927fce5 (patch) | |
tree | d9a5177cbab89abdd1e9ae4c0e6820a9187f36c0 /apps/codecs | |
parent | cece75eb42ca7e294fb423ff64c0d664cb374ec6 (diff) | |
download | rockbox-a8d1cfdec8f62f976ba03713da07b88bd927fce5.tar.gz rockbox-a8d1cfdec8f62f976ba03713da07b88bd927fce5.zip |
Approx 10% speedup in cook on files tested: Remove some inner loops in favour of memcpy/memset/vect_add calls; remove multiplication from index arithmetic in loops in favour of pointer arithmetic; make use of the MULT31, MULT31_SHIFT15 and CLIP_TO_15 implementations from codelib instead of having their own implementations in cook
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22055 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r-- | apps/codecs/libcook/cook.c | 79 | ||||
-rw-r--r-- | apps/codecs/libcook/cook_fixpoint.h | 104 |
2 files changed, 84 insertions, 99 deletions
diff --git a/apps/codecs/libcook/cook.c b/apps/codecs/libcook/cook.c index 524f5e1ff8..7ad994926e 100644 --- a/apps/codecs/libcook/cook.c +++ b/apps/codecs/libcook/cook.c | |||
@@ -328,13 +328,8 @@ static void categorize(COOKContext *q, int* quant_index_table, | |||
328 | --exp_index2[index]; | 328 | --exp_index2[index]; |
329 | } | 329 | } |
330 | } | 330 | } |
331 | 331 | memcpy(category, exp_index2, sizeof(int) * q->total_subbands ); | |
332 | for(i=0 ; i<q->total_subbands ; i++) | 332 | memcpy(category_index, tmp_categorize_array+tmp_categorize_array2_idx, sizeof(int) * (q->numvector_size-1) ); |
333 | category[i] = exp_index2[i]; | ||
334 | |||
335 | for(i=0 ; i<q->numvector_size-1 ; i++) | ||
336 | category_index[i] = tmp_categorize_array[tmp_categorize_array2_idx++]; | ||
337 | |||
338 | } | 333 | } |
339 | 334 | ||
340 | 335 | ||
@@ -370,27 +365,38 @@ static int unpack_SQVH(COOKContext *q, int category, int* subband_coef_index, | |||
370 | 365 | ||
371 | vd = vd_tab[category]; | 366 | vd = vd_tab[category]; |
372 | result = 0; | 367 | result = 0; |
373 | for(i=0 ; i<vpr_tab[category] ; i++){ | 368 | for(i=0 ; i<vpr_tab[category] ; i++) |
369 | { | ||
374 | vlc = get_vlc2(&q->gb, q->sqvh[category].table, q->sqvh[category].bits, 3); | 370 | vlc = get_vlc2(&q->gb, q->sqvh[category].table, q->sqvh[category].bits, 3); |
375 | if (q->bits_per_subpacket < get_bits_count(&q->gb)){ | 371 | if (q->bits_per_subpacket < get_bits_count(&q->gb)) |
372 | { | ||
376 | vlc = 0; | 373 | vlc = 0; |
377 | result = 1; | 374 | result = 1; |
375 | memset(subband_coef_index, 0, sizeof(int)*vd); | ||
376 | memset(subband_coef_sign, 0, sizeof(int)*vd); | ||
377 | subband_coef_index+=vd; | ||
378 | subband_coef_sign+=vd; | ||
378 | } | 379 | } |
379 | for(j=vd-1 ; j>=0 ; j--){ | 380 | else |
380 | tmp = (vlc * invradix_tab[category])/0x100000; | 381 | { |
381 | subband_coef_index[vd*i+j] = vlc - tmp * (kmax_tab[category]+1); | 382 | for(j=vd-1 ; j>=0 ; j--){ |
382 | vlc = tmp; | 383 | tmp = (vlc * invradix_tab[category])/0x100000; |
383 | } | 384 | subband_coef_index[j] = vlc - tmp * (kmax_tab[category]+1); |
384 | for(j=0 ; j<vd ; j++){ | 385 | vlc = tmp; |
385 | if (subband_coef_index[i*vd + j]) { | 386 | } |
386 | if(get_bits_count(&q->gb) < q->bits_per_subpacket){ | 387 | |
387 | subband_coef_sign[i*vd+j] = get_bits1(&q->gb); | 388 | for(j=0 ; j<vd ; j++) |
389 | { | ||
390 | if (*subband_coef_index++) { | ||
391 | if(get_bits_count(&q->gb) < q->bits_per_subpacket) { | ||
392 | *subband_coef_sign++ = get_bits1(&q->gb); | ||
393 | } else { | ||
394 | result=1; | ||
395 | *subband_coef_sign++=0; | ||
396 | } | ||
388 | } else { | 397 | } else { |
389 | result=1; | 398 | *subband_coef_sign++=0; |
390 | subband_coef_sign[i*vd+j]=0; | ||
391 | } | 399 | } |
392 | } else { | ||
393 | subband_coef_sign[i*vd+j]=0; | ||
394 | } | 400 | } |
395 | } | 401 | } |
396 | } | 402 | } |
@@ -505,7 +511,7 @@ static void decouple_info(COOKContext *q, int* decouple_tab){ | |||
505 | 511 | ||
506 | static void joint_decode(COOKContext *q, REAL_T* mlt_buffer1, | 512 | static void joint_decode(COOKContext *q, REAL_T* mlt_buffer1, |
507 | REAL_T* mlt_buffer2) { | 513 | REAL_T* mlt_buffer2) { |
508 | int i,j; | 514 | int i; |
509 | int decouple_tab[SUBBAND_SIZE]; | 515 | int decouple_tab[SUBBAND_SIZE]; |
510 | REAL_T *decode_buffer = q->decode_buffer_0; | 516 | REAL_T *decode_buffer = q->decode_buffer_0; |
511 | int idx; | 517 | int idx; |
@@ -520,11 +526,14 @@ static void joint_decode(COOKContext *q, REAL_T* mlt_buffer1, | |||
520 | mono_decode(q, decode_buffer); | 526 | mono_decode(q, decode_buffer); |
521 | 527 | ||
522 | /* The two channels are stored interleaved in decode_buffer. */ | 528 | /* The two channels are stored interleaved in decode_buffer. */ |
523 | for (i=0 ; i<q->js_subband_start ; i++) { | 529 | REAL_T * mlt_buffer1_end = mlt_buffer1 + (q->js_subband_start*SUBBAND_SIZE); |
524 | for (j=0 ; j<SUBBAND_SIZE ; j++) { | 530 | while(mlt_buffer1 < mlt_buffer1_end) |
525 | mlt_buffer1[i*20+j] = decode_buffer[i*40+j]; | 531 | { |
526 | mlt_buffer2[i*20+j] = decode_buffer[i*40+20+j]; | 532 | memcpy(mlt_buffer1,decode_buffer,sizeof(REAL_T)*SUBBAND_SIZE); |
527 | } | 533 | memcpy(mlt_buffer2,decode_buffer+20,sizeof(REAL_T)*SUBBAND_SIZE); |
534 | mlt_buffer1 += 20; | ||
535 | mlt_buffer2 += 20; | ||
536 | decode_buffer += 40; | ||
528 | } | 537 | } |
529 | 538 | ||
530 | /* When we reach js_subband_start (the higher frequencies) | 539 | /* When we reach js_subband_start (the higher frequencies) |
@@ -533,11 +542,15 @@ static void joint_decode(COOKContext *q, REAL_T* mlt_buffer1, | |||
533 | for (i=q->js_subband_start ; i<q->subbands ; i++) { | 542 | for (i=q->js_subband_start ; i<q->subbands ; i++) { |
534 | int i1 = decouple_tab[cplband[i]]; | 543 | int i1 = decouple_tab[cplband[i]]; |
535 | int i2 = idx - i1 - 1; | 544 | int i2 = idx - i1 - 1; |
536 | for (j=0 ; j<SUBBAND_SIZE ; j++) { | 545 | mlt_buffer1_end = mlt_buffer1 + SUBBAND_SIZE; |
537 | REAL_T x = decode_buffer[((q->js_subband_start + i)*20)+j]; | 546 | while(mlt_buffer1 < mlt_buffer1_end) |
538 | mlt_buffer1[20*i+j] = cplscale_math(x, q->js_vlc_bits, i1); | 547 | { |
539 | mlt_buffer2[20*i+j] = cplscale_math(x, q->js_vlc_bits, i2); | 548 | *mlt_buffer1++ = cplscale_math(*decode_buffer, q->js_vlc_bits, i1); |
549 | *mlt_buffer2++ = cplscale_math(*decode_buffer++, q->js_vlc_bits, i2); | ||
540 | } | 550 | } |
551 | mlt_buffer1 += (20-SUBBAND_SIZE); | ||
552 | mlt_buffer2 += (20-SUBBAND_SIZE); | ||
553 | decode_buffer += (20-SUBBAND_SIZE); | ||
541 | } | 554 | } |
542 | } | 555 | } |
543 | 556 | ||
@@ -581,7 +594,7 @@ decode_bytes_and_gain(COOKContext *q, const uint8_t *inbuffer, | |||
581 | * @param chan 0: left or single channel, 1: right channel | 594 | * @param chan 0: left or single channel, 1: right channel |
582 | */ | 595 | */ |
583 | 596 | ||
584 | static inline void | 597 | static void |
585 | mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer, | 598 | mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer, |
586 | cook_gains *gains, REAL_T *previous_buffer, | 599 | cook_gains *gains, REAL_T *previous_buffer, |
587 | int16_t *out, int chan) | 600 | int16_t *out, int chan) |
diff --git a/apps/codecs/libcook/cook_fixpoint.h b/apps/codecs/libcook/cook_fixpoint.h index 32d8a81cc2..f92d717f20 100644 --- a/apps/codecs/libcook/cook_fixpoint.h +++ b/apps/codecs/libcook/cook_fixpoint.h | |||
@@ -35,8 +35,13 @@ | |||
35 | * in C using two 32 bit integer multiplications. | 35 | * in C using two 32 bit integer multiplications. |
36 | */ | 36 | */ |
37 | 37 | ||
38 | /* get definitions of MULT31, MULT31_SHIFT15, CLIP_TO_15, vect_add, from codelib */ | ||
39 | #include "asm_arm.h" | ||
40 | #include "asm_mcf5249.h" | ||
41 | #include "codeclib_misc.h" | ||
42 | |||
38 | /* The following table is taken from libavutil/mathematics.c */ | 43 | /* The following table is taken from libavutil/mathematics.c */ |
39 | const uint8_t ff_log2_tab[256]={ | 44 | const uint8_t ff_log2_tab[256] ={ |
40 | 0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, | 45 | 0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, |
41 | 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, | 46 | 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, |
42 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, | 47 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
@@ -67,6 +72,11 @@ static inline FIXP fixp_pow2(FIXP x, int i) | |||
67 | return x << i; /* no check for overflow */ | 72 | return x << i; /* no check for overflow */ |
68 | } | 73 | } |
69 | 74 | ||
75 | static inline FIXP fixp_pow2_neg(FIXP x, int i) | ||
76 | { | ||
77 | return (x >> i) + ((x >> (i-1)) & 1); | ||
78 | } | ||
79 | |||
70 | /** | 80 | /** |
71 | * Fixed point multiply by fraction. | 81 | * Fixed point multiply by fraction. |
72 | * | 82 | * |
@@ -74,53 +84,10 @@ static inline FIXP fixp_pow2(FIXP x, int i) | |||
74 | * @param b fix point fraction, 0 <= b < 1 | 84 | * @param b fix point fraction, 0 <= b < 1 |
75 | */ | 85 | */ |
76 | 86 | ||
77 | static inline FIXP fixp_mult_su(FIXP a, FIXPU b) | 87 | #define fixp_mult_su(x,y) (MULT31_SHIFT15(x,y)) |
78 | { | ||
79 | |||
80 | int32_t hb = (a >> 16) * b; | ||
81 | uint32_t lb = (a & 0xffff) * b; | ||
82 | |||
83 | return hb + (lb >> 16) + ((lb & 0x8000) >> 15); | ||
84 | } | ||
85 | 88 | ||
86 | /* Faster version of the above using 32x32=64 bit multiply */ | 89 | /* Faster version of the above using 32x32=64 bit multiply */ |
87 | #ifdef CPU_ARM | 90 | #define fixmul31(x,y) (MULT31(x,y)) |
88 | #define fixmul31(x, y) \ | ||
89 | ({ int32_t __hi; \ | ||
90 | uint32_t __lo; \ | ||
91 | int32_t __result; \ | ||
92 | asm ("smull %0, %1, %3, %4\n\t" \ | ||
93 | "movs %2, %1, lsl #1" \ | ||
94 | : "=&r" (__lo), "=&r" (__hi), "=r" (__result) \ | ||
95 | : "%r" (x), "r" (y) \ | ||
96 | : "cc"); \ | ||
97 | __result; \ | ||
98 | }) | ||
99 | |||
100 | #elif defined(CPU_COLDFIRE) | ||
101 | static inline int32_t fixmul31(int32_t x, int32_t y) | ||
102 | { | ||
103 | asm ( | ||
104 | "mac.l %[x], %[y], %%acc0 \n" /* multiply */ | ||
105 | "movclr.l %%acc0, %[x] \n" /* get higher half */ | ||
106 | : [x] "+d" (x) | ||
107 | : [y] "d" (y) | ||
108 | ); | ||
109 | return x; | ||
110 | } | ||
111 | #else | ||
112 | static inline int32_t fixmul31(int32_t x, int32_t y) | ||
113 | { | ||
114 | int64_t temp; | ||
115 | |||
116 | temp = x; | ||
117 | temp *= y; | ||
118 | |||
119 | temp >>= 31; //16+31-16 = 31 bits | ||
120 | |||
121 | return (int32_t)temp; | ||
122 | } | ||
123 | #endif | ||
124 | 91 | ||
125 | /* math functions taken from libavutil/common.h */ | 92 | /* math functions taken from libavutil/common.h */ |
126 | 93 | ||
@@ -169,13 +136,13 @@ static void scalar_dequant_math(COOKContext *q, int index, | |||
169 | int* subband_coef_sign, REAL_T *mlt_p) | 136 | int* subband_coef_sign, REAL_T *mlt_p) |
170 | { | 137 | { |
171 | /* Num. half bits to right shift */ | 138 | /* Num. half bits to right shift */ |
172 | const int s = 33 - quant_index + av_log2(q->samples_per_channel); | 139 | const int s = (33 - quant_index + av_log2(q->samples_per_channel)) >> 1; |
173 | const FIXP *table = quant_tables[s & 1][index]; | 140 | const FIXP *table = quant_tables[s & 1][index]; |
174 | FIXP f; | 141 | FIXP f; |
175 | int i; | 142 | int i; |
176 | 143 | ||
177 | 144 | ||
178 | if(s >= 64) | 145 | if(s >= 32) |
179 | memset(mlt_p, 0, sizeof(REAL_T)*SUBBAND_SIZE); | 146 | memset(mlt_p, 0, sizeof(REAL_T)*SUBBAND_SIZE); |
180 | else | 147 | else |
181 | { | 148 | { |
@@ -186,7 +153,7 @@ static void scalar_dequant_math(COOKContext *q, int index, | |||
186 | ((subband_coef_index[i] != 0) && subband_coef_sign[i])) | 153 | ((subband_coef_index[i] != 0) && subband_coef_sign[i])) |
187 | f = -f; | 154 | f = -f; |
188 | 155 | ||
189 | mlt_p[i] =fixp_pow2(f, -(s/2)); | 156 | *mlt_p++ = fixp_pow2_neg(f, s); |
190 | } | 157 | } |
191 | } | 158 | } |
192 | } | 159 | } |
@@ -274,10 +241,9 @@ static inline void imlt_math(COOKContext *q, FIXP *in) | |||
274 | static inline void overlap_math(COOKContext *q, int gain, FIXP buffer[]) | 241 | static inline void overlap_math(COOKContext *q, int gain, FIXP buffer[]) |
275 | { | 242 | { |
276 | int i; | 243 | int i; |
277 | if(LIKELY(gain == 0)){ | 244 | if(LIKELY(gain == 0)) |
278 | for(i=0 ; i<q->samples_per_channel ; i++) { | 245 | { |
279 | q->mono_mdct_output[i] += buffer[i]; | 246 | vect_add(q->mono_mdct_output, buffer, q->samples_per_channel); |
280 | } | ||
281 | 247 | ||
282 | } else if (gain > 0){ | 248 | } else if (gain > 0){ |
283 | for(i=0 ; i<q->samples_per_channel ; i++) { | 249 | for(i=0 ; i<q->samples_per_channel ; i++) { |
@@ -301,7 +267,7 @@ static inline void overlap_math(COOKContext *q, int gain, FIXP buffer[]) | |||
301 | * @param gain_index_next index for the next block multiplier | 267 | * @param gain_index_next index for the next block multiplier |
302 | */ | 268 | */ |
303 | static inline void | 269 | static inline void |
304 | interpolate_math(COOKContext *q, FIXP* buffer, | 270 | interpolate_math(COOKContext *q, register FIXP* buffer, |
305 | int gain_index, int gain_index_next) | 271 | int gain_index, int gain_index_next) |
306 | { | 272 | { |
307 | int i; | 273 | int i; |
@@ -315,14 +281,17 @@ interpolate_math(COOKContext *q, FIXP* buffer, | |||
315 | int step = (gain_index_next - gain_index) | 281 | int step = (gain_index_next - gain_index) |
316 | << (7 - av_log2(gain_size_factor)); | 282 | << (7 - av_log2(gain_size_factor)); |
317 | int x = 0; | 283 | int x = 0; |
318 | 284 | register FIXP* bufferend = buffer+gain_size_factor; | |
319 | for(i = 0; i < gain_size_factor; i++) { | 285 | while(buffer < bufferend ) |
320 | buffer[i] = fixp_mult_su(buffer[i], pow128_tab[x]); | 286 | { |
321 | buffer[i] = fixp_pow2(buffer[i], gain_index+1); | 287 | *buffer = fixp_pow2( |
288 | fixp_mult_su(*buffer, pow128_tab[x]), | ||
289 | gain_index+1); | ||
290 | buffer++; | ||
322 | 291 | ||
323 | x += step; | 292 | x += step; |
324 | gain_index += (x + 128) / 128 - 1; | 293 | gain_index += ( (x + 128) >> 7 ) - 1; |
325 | x = (x + 128) % 128; | 294 | x = ( (x + 128) & 127 ); |
326 | } | 295 | } |
327 | } | 296 | } |
328 | } | 297 | } |
@@ -349,12 +318,15 @@ static inline FIXP cplscale_math(FIXP x, int table, int i) | |||
349 | * @param out pointer to the output buffer | 318 | * @param out pointer to the output buffer |
350 | * @param chan 0: left or single channel, 1: right channel | 319 | * @param chan 0: left or single channel, 1: right channel |
351 | */ | 320 | */ |
352 | static inline void output_math(COOKContext *q, int16_t *out, int chan) | 321 | static inline void output_math(COOKContext *q, register int16_t *out, int chan) |
353 | { | 322 | { |
354 | int j; | 323 | register REAL_T * mono_output_ptr = q->mono_mdct_output; |
355 | 324 | register REAL_T * mono_output_end = mono_output_ptr + q->samples_per_channel; | |
356 | for (j = 0; j < q->samples_per_channel; j++) { | 325 | out += chan; |
357 | out[chan + q->nb_channels * j] = | 326 | const int STEP = q->nb_channels; |
358 | av_clip(fixp_pow2(q->mono_mdct_output[j], -11), -32768, 32767); | 327 | while( mono_output_ptr < mono_output_end ) |
328 | { | ||
329 | *out = CLIP_TO_15(fixp_pow2_neg(*mono_output_ptr++, 11)); | ||
330 | out += STEP; | ||
359 | } | 331 | } |
360 | } | 332 | } |