summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Hooper <dave@beermex.com>2009-07-26 19:06:36 +0000
committerDave Hooper <dave@beermex.com>2009-07-26 19:06:36 +0000
commita8d1cfdec8f62f976ba03713da07b88bd927fce5 (patch)
treed9a5177cbab89abdd1e9ae4c0e6820a9187f36c0
parentcece75eb42ca7e294fb423ff64c0d664cb374ec6 (diff)
downloadrockbox-a8d1cfdec8f62f976ba03713da07b88bd927fce5.tar.gz
rockbox-a8d1cfdec8f62f976ba03713da07b88bd927fce5.zip
Approx 10% speedup in cook on files tested: Remove some inner loops in favour of memcpy/memset/vect_add calls; remove multiplication from index arithmetic in loops in favour of pointer arithmetic; make use of the MULT31, MULT31_SHIFT15 and CLIP_TO_15 implementations from codelib instead of having their own implementations in cook
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22055 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libcook/cook.c79
-rw-r--r--apps/codecs/libcook/cook_fixpoint.h104
2 files changed, 84 insertions, 99 deletions
diff --git a/apps/codecs/libcook/cook.c b/apps/codecs/libcook/cook.c
index 524f5e1ff8..7ad994926e 100644
--- a/apps/codecs/libcook/cook.c
+++ b/apps/codecs/libcook/cook.c
@@ -328,13 +328,8 @@ static void categorize(COOKContext *q, int* quant_index_table,
328 --exp_index2[index]; 328 --exp_index2[index];
329 } 329 }
330 } 330 }
331 331 memcpy(category, exp_index2, sizeof(int) * q->total_subbands );
332 for(i=0 ; i<q->total_subbands ; i++) 332 memcpy(category_index, tmp_categorize_array+tmp_categorize_array2_idx, sizeof(int) * (q->numvector_size-1) );
333 category[i] = exp_index2[i];
334
335 for(i=0 ; i<q->numvector_size-1 ; i++)
336 category_index[i] = tmp_categorize_array[tmp_categorize_array2_idx++];
337
338} 333}
339 334
340 335
@@ -370,27 +365,38 @@ static int unpack_SQVH(COOKContext *q, int category, int* subband_coef_index,
370 365
371 vd = vd_tab[category]; 366 vd = vd_tab[category];
372 result = 0; 367 result = 0;
373 for(i=0 ; i<vpr_tab[category] ; i++){ 368 for(i=0 ; i<vpr_tab[category] ; i++)
369 {
374 vlc = get_vlc2(&q->gb, q->sqvh[category].table, q->sqvh[category].bits, 3); 370 vlc = get_vlc2(&q->gb, q->sqvh[category].table, q->sqvh[category].bits, 3);
375 if (q->bits_per_subpacket < get_bits_count(&q->gb)){ 371 if (q->bits_per_subpacket < get_bits_count(&q->gb))
372 {
376 vlc = 0; 373 vlc = 0;
377 result = 1; 374 result = 1;
375 memset(subband_coef_index, 0, sizeof(int)*vd);
376 memset(subband_coef_sign, 0, sizeof(int)*vd);
377 subband_coef_index+=vd;
378 subband_coef_sign+=vd;
378 } 379 }
379 for(j=vd-1 ; j>=0 ; j--){ 380 else
380 tmp = (vlc * invradix_tab[category])/0x100000; 381 {
381 subband_coef_index[vd*i+j] = vlc - tmp * (kmax_tab[category]+1); 382 for(j=vd-1 ; j>=0 ; j--){
382 vlc = tmp; 383 tmp = (vlc * invradix_tab[category])/0x100000;
383 } 384 subband_coef_index[j] = vlc - tmp * (kmax_tab[category]+1);
384 for(j=0 ; j<vd ; j++){ 385 vlc = tmp;
385 if (subband_coef_index[i*vd + j]) { 386 }
386 if(get_bits_count(&q->gb) < q->bits_per_subpacket){ 387
387 subband_coef_sign[i*vd+j] = get_bits1(&q->gb); 388 for(j=0 ; j<vd ; j++)
389 {
390 if (*subband_coef_index++) {
391 if(get_bits_count(&q->gb) < q->bits_per_subpacket) {
392 *subband_coef_sign++ = get_bits1(&q->gb);
393 } else {
394 result=1;
395 *subband_coef_sign++=0;
396 }
388 } else { 397 } else {
389 result=1; 398 *subband_coef_sign++=0;
390 subband_coef_sign[i*vd+j]=0;
391 } 399 }
392 } else {
393 subband_coef_sign[i*vd+j]=0;
394 } 400 }
395 } 401 }
396 } 402 }
@@ -505,7 +511,7 @@ static void decouple_info(COOKContext *q, int* decouple_tab){
505 511
506static void joint_decode(COOKContext *q, REAL_T* mlt_buffer1, 512static void joint_decode(COOKContext *q, REAL_T* mlt_buffer1,
507 REAL_T* mlt_buffer2) { 513 REAL_T* mlt_buffer2) {
508 int i,j; 514 int i;
509 int decouple_tab[SUBBAND_SIZE]; 515 int decouple_tab[SUBBAND_SIZE];
510 REAL_T *decode_buffer = q->decode_buffer_0; 516 REAL_T *decode_buffer = q->decode_buffer_0;
511 int idx; 517 int idx;
@@ -520,11 +526,14 @@ static void joint_decode(COOKContext *q, REAL_T* mlt_buffer1,
520 mono_decode(q, decode_buffer); 526 mono_decode(q, decode_buffer);
521 527
522 /* The two channels are stored interleaved in decode_buffer. */ 528 /* The two channels are stored interleaved in decode_buffer. */
523 for (i=0 ; i<q->js_subband_start ; i++) { 529 REAL_T * mlt_buffer1_end = mlt_buffer1 + (q->js_subband_start*SUBBAND_SIZE);
524 for (j=0 ; j<SUBBAND_SIZE ; j++) { 530 while(mlt_buffer1 < mlt_buffer1_end)
525 mlt_buffer1[i*20+j] = decode_buffer[i*40+j]; 531 {
526 mlt_buffer2[i*20+j] = decode_buffer[i*40+20+j]; 532 memcpy(mlt_buffer1,decode_buffer,sizeof(REAL_T)*SUBBAND_SIZE);
527 } 533 memcpy(mlt_buffer2,decode_buffer+20,sizeof(REAL_T)*SUBBAND_SIZE);
534 mlt_buffer1 += 20;
535 mlt_buffer2 += 20;
536 decode_buffer += 40;
528 } 537 }
529 538
530 /* When we reach js_subband_start (the higher frequencies) 539 /* When we reach js_subband_start (the higher frequencies)
@@ -533,11 +542,15 @@ static void joint_decode(COOKContext *q, REAL_T* mlt_buffer1,
533 for (i=q->js_subband_start ; i<q->subbands ; i++) { 542 for (i=q->js_subband_start ; i<q->subbands ; i++) {
534 int i1 = decouple_tab[cplband[i]]; 543 int i1 = decouple_tab[cplband[i]];
535 int i2 = idx - i1 - 1; 544 int i2 = idx - i1 - 1;
536 for (j=0 ; j<SUBBAND_SIZE ; j++) { 545 mlt_buffer1_end = mlt_buffer1 + SUBBAND_SIZE;
537 REAL_T x = decode_buffer[((q->js_subband_start + i)*20)+j]; 546 while(mlt_buffer1 < mlt_buffer1_end)
538 mlt_buffer1[20*i+j] = cplscale_math(x, q->js_vlc_bits, i1); 547 {
539 mlt_buffer2[20*i+j] = cplscale_math(x, q->js_vlc_bits, i2); 548 *mlt_buffer1++ = cplscale_math(*decode_buffer, q->js_vlc_bits, i1);
549 *mlt_buffer2++ = cplscale_math(*decode_buffer++, q->js_vlc_bits, i2);
540 } 550 }
551 mlt_buffer1 += (20-SUBBAND_SIZE);
552 mlt_buffer2 += (20-SUBBAND_SIZE);
553 decode_buffer += (20-SUBBAND_SIZE);
541 } 554 }
542} 555}
543 556
@@ -581,7 +594,7 @@ decode_bytes_and_gain(COOKContext *q, const uint8_t *inbuffer,
581 * @param chan 0: left or single channel, 1: right channel 594 * @param chan 0: left or single channel, 1: right channel
582 */ 595 */
583 596
584static inline void 597static void
585mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer, 598mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer,
586 cook_gains *gains, REAL_T *previous_buffer, 599 cook_gains *gains, REAL_T *previous_buffer,
587 int16_t *out, int chan) 600 int16_t *out, int chan)
diff --git a/apps/codecs/libcook/cook_fixpoint.h b/apps/codecs/libcook/cook_fixpoint.h
index 32d8a81cc2..f92d717f20 100644
--- a/apps/codecs/libcook/cook_fixpoint.h
+++ b/apps/codecs/libcook/cook_fixpoint.h
@@ -35,8 +35,13 @@
35 * in C using two 32 bit integer multiplications. 35 * in C using two 32 bit integer multiplications.
36 */ 36 */
37 37
38/* get definitions of MULT31, MULT31_SHIFT15, CLIP_TO_15, vect_add, from codelib */
39#include "asm_arm.h"
40#include "asm_mcf5249.h"
41#include "codeclib_misc.h"
42
38/* The following table is taken from libavutil/mathematics.c */ 43/* The following table is taken from libavutil/mathematics.c */
39const uint8_t ff_log2_tab[256]={ 44const uint8_t ff_log2_tab[256] ={
40 0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 45 0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
41 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 46 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
42 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 47 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
@@ -67,6 +72,11 @@ static inline FIXP fixp_pow2(FIXP x, int i)
67 return x << i; /* no check for overflow */ 72 return x << i; /* no check for overflow */
68} 73}
69 74
75static inline FIXP fixp_pow2_neg(FIXP x, int i)
76{
77 return (x >> i) + ((x >> (i-1)) & 1);
78}
79
70/** 80/**
71 * Fixed point multiply by fraction. 81 * Fixed point multiply by fraction.
72 * 82 *
@@ -74,53 +84,10 @@ static inline FIXP fixp_pow2(FIXP x, int i)
74 * @param b fix point fraction, 0 <= b < 1 84 * @param b fix point fraction, 0 <= b < 1
75 */ 85 */
76 86
77static inline FIXP fixp_mult_su(FIXP a, FIXPU b) 87#define fixp_mult_su(x,y) (MULT31_SHIFT15(x,y))
78{
79
80 int32_t hb = (a >> 16) * b;
81 uint32_t lb = (a & 0xffff) * b;
82
83 return hb + (lb >> 16) + ((lb & 0x8000) >> 15);
84}
85 88
86/* Faster version of the above using 32x32=64 bit multiply */ 89/* Faster version of the above using 32x32=64 bit multiply */
87#ifdef CPU_ARM 90#define fixmul31(x,y) (MULT31(x,y))
88#define fixmul31(x, y) \
89 ({ int32_t __hi; \
90 uint32_t __lo; \
91 int32_t __result; \
92 asm ("smull %0, %1, %3, %4\n\t" \
93 "movs %2, %1, lsl #1" \
94 : "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
95 : "%r" (x), "r" (y) \
96 : "cc"); \
97 __result; \
98 })
99
100#elif defined(CPU_COLDFIRE)
101static inline int32_t fixmul31(int32_t x, int32_t y)
102{
103 asm (
104 "mac.l %[x], %[y], %%acc0 \n" /* multiply */
105 "movclr.l %%acc0, %[x] \n" /* get higher half */
106 : [x] "+d" (x)
107 : [y] "d" (y)
108 );
109 return x;
110}
111#else
112static inline int32_t fixmul31(int32_t x, int32_t y)
113{
114 int64_t temp;
115
116 temp = x;
117 temp *= y;
118
119 temp >>= 31; //16+31-16 = 31 bits
120
121 return (int32_t)temp;
122}
123#endif
124 91
125/* math functions taken from libavutil/common.h */ 92/* math functions taken from libavutil/common.h */
126 93
@@ -169,13 +136,13 @@ static void scalar_dequant_math(COOKContext *q, int index,
169 int* subband_coef_sign, REAL_T *mlt_p) 136 int* subband_coef_sign, REAL_T *mlt_p)
170{ 137{
171 /* Num. half bits to right shift */ 138 /* Num. half bits to right shift */
172 const int s = 33 - quant_index + av_log2(q->samples_per_channel); 139 const int s = (33 - quant_index + av_log2(q->samples_per_channel)) >> 1;
173 const FIXP *table = quant_tables[s & 1][index]; 140 const FIXP *table = quant_tables[s & 1][index];
174 FIXP f; 141 FIXP f;
175 int i; 142 int i;
176 143
177 144
178 if(s >= 64) 145 if(s >= 32)
179 memset(mlt_p, 0, sizeof(REAL_T)*SUBBAND_SIZE); 146 memset(mlt_p, 0, sizeof(REAL_T)*SUBBAND_SIZE);
180 else 147 else
181 { 148 {
@@ -186,7 +153,7 @@ static void scalar_dequant_math(COOKContext *q, int index,
186 ((subband_coef_index[i] != 0) && subband_coef_sign[i])) 153 ((subband_coef_index[i] != 0) && subband_coef_sign[i]))
187 f = -f; 154 f = -f;
188 155
189 mlt_p[i] =fixp_pow2(f, -(s/2)); 156 *mlt_p++ = fixp_pow2_neg(f, s);
190 } 157 }
191 } 158 }
192} 159}
@@ -274,10 +241,9 @@ static inline void imlt_math(COOKContext *q, FIXP *in)
274static inline void overlap_math(COOKContext *q, int gain, FIXP buffer[]) 241static inline void overlap_math(COOKContext *q, int gain, FIXP buffer[])
275{ 242{
276 int i; 243 int i;
277 if(LIKELY(gain == 0)){ 244 if(LIKELY(gain == 0))
278 for(i=0 ; i<q->samples_per_channel ; i++) { 245 {
279 q->mono_mdct_output[i] += buffer[i]; 246 vect_add(q->mono_mdct_output, buffer, q->samples_per_channel);
280 }
281 247
282 } else if (gain > 0){ 248 } else if (gain > 0){
283 for(i=0 ; i<q->samples_per_channel ; i++) { 249 for(i=0 ; i<q->samples_per_channel ; i++) {
@@ -301,7 +267,7 @@ static inline void overlap_math(COOKContext *q, int gain, FIXP buffer[])
301 * @param gain_index_next index for the next block multiplier 267 * @param gain_index_next index for the next block multiplier
302 */ 268 */
303static inline void 269static inline void
304interpolate_math(COOKContext *q, FIXP* buffer, 270interpolate_math(COOKContext *q, register FIXP* buffer,
305 int gain_index, int gain_index_next) 271 int gain_index, int gain_index_next)
306{ 272{
307 int i; 273 int i;
@@ -315,14 +281,17 @@ interpolate_math(COOKContext *q, FIXP* buffer,
315 int step = (gain_index_next - gain_index) 281 int step = (gain_index_next - gain_index)
316 << (7 - av_log2(gain_size_factor)); 282 << (7 - av_log2(gain_size_factor));
317 int x = 0; 283 int x = 0;
318 284 register FIXP* bufferend = buffer+gain_size_factor;
319 for(i = 0; i < gain_size_factor; i++) { 285 while(buffer < bufferend )
320 buffer[i] = fixp_mult_su(buffer[i], pow128_tab[x]); 286 {
321 buffer[i] = fixp_pow2(buffer[i], gain_index+1); 287 *buffer = fixp_pow2(
288 fixp_mult_su(*buffer, pow128_tab[x]),
289 gain_index+1);
290 buffer++;
322 291
323 x += step; 292 x += step;
324 gain_index += (x + 128) / 128 - 1; 293 gain_index += ( (x + 128) >> 7 ) - 1;
325 x = (x + 128) % 128; 294 x = ( (x + 128) & 127 );
326 } 295 }
327 } 296 }
328} 297}
@@ -349,12 +318,15 @@ static inline FIXP cplscale_math(FIXP x, int table, int i)
349 * @param out pointer to the output buffer 318 * @param out pointer to the output buffer
350 * @param chan 0: left or single channel, 1: right channel 319 * @param chan 0: left or single channel, 1: right channel
351 */ 320 */
352static inline void output_math(COOKContext *q, int16_t *out, int chan) 321static inline void output_math(COOKContext *q, register int16_t *out, int chan)
353{ 322{
354 int j; 323 register REAL_T * mono_output_ptr = q->mono_mdct_output;
355 324 register REAL_T * mono_output_end = mono_output_ptr + q->samples_per_channel;
356 for (j = 0; j < q->samples_per_channel; j++) { 325 out += chan;
357 out[chan + q->nb_channels * j] = 326 const int STEP = q->nb_channels;
358 av_clip(fixp_pow2(q->mono_mdct_output[j], -11), -32768, 32767); 327 while( mono_output_ptr < mono_output_end )
328 {
329 *out = CLIP_TO_15(fixp_pow2_neg(*mono_output_ptr++, 11));
330 out += STEP;
359 } 331 }
360} 332}