diff options
author | Jens Arnold <amiconn@rockbox.org> | 2008-11-24 18:40:49 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2008-11-24 18:40:49 +0000 |
commit | 3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f (patch) | |
tree | a064992dc69635d60e7bc1cb106fc43c6a4e3a40 /apps | |
parent | 66c0cf2eb17158eec9d0cd2553481a2caf86e611 (diff) | |
download | rockbox-3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f.tar.gz rockbox-3761c0108cbfc6f88c4bf43fc13a38a2f7db0d6f.zip |
Branch optimisation in both C (giving hints to gcc - verified using -fprofile-arcs and gcov) and asm files. Biggest effect on coldfire (-c1000: +8%, -c2000: +5%), but ARM also profits a bit (less than 1% on ARM7TDMI, around 1% on ARM1136).
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19199 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/demac/libdemac/demac_config.h | 9 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/entropy.c | 17 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/filter.c | 36 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/predictor-arm.S | 27 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/predictor-cf.S | 46 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/predictor.c | 137 |
6 files changed, 147 insertions, 125 deletions
diff --git a/apps/codecs/demac/libdemac/demac_config.h b/apps/codecs/demac/libdemac/demac_config.h index dd3aaa3f9c..986e5376c4 100644 --- a/apps/codecs/demac/libdemac/demac_config.h +++ b/apps/codecs/demac/libdemac/demac_config.h | |||
@@ -70,6 +70,15 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
70 | #define ICODE_ATTR | 70 | #define ICODE_ATTR |
71 | #define ICODE_ATTR_DEMAC | 71 | #define ICODE_ATTR_DEMAC |
72 | 72 | ||
73 | /* Use to give gcc hints on which branch is most likely taken */ | ||
74 | #if defined(__GNUC__) && __GNUC__ >= 3 | ||
75 | #define LIKELY(x) __builtin_expect(!!(x), 1) | ||
76 | #define UNLIKELY(x) __builtin_expect(!!(x), 0) | ||
77 | #else | ||
78 | #define LIKELY(x) (x) | ||
79 | #define UNLIKELY(x) (x) | ||
80 | #endif | ||
81 | |||
73 | #endif /* !ROCKBOX */ | 82 | #endif /* !ROCKBOX */ |
74 | 83 | ||
75 | /* Defaults */ | 84 | /* Defaults */ |
diff --git a/apps/codecs/demac/libdemac/entropy.c b/apps/codecs/demac/libdemac/entropy.c index 54ff226bce..e8561122a7 100644 --- a/apps/codecs/demac/libdemac/entropy.c +++ b/apps/codecs/demac/libdemac/entropy.c | |||
@@ -283,13 +283,13 @@ static inline void update_rice(struct rice_t* rice, int x) | |||
283 | { | 283 | { |
284 | rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5); | 284 | rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5); |
285 | 285 | ||
286 | if (rice->k == 0) { | 286 | if (UNLIKELY(rice->k == 0)) { |
287 | rice->k = 1; | 287 | rice->k = 1; |
288 | } else { | 288 | } else { |
289 | uint32_t lim = 1 << (rice->k + 4); | 289 | uint32_t lim = 1 << (rice->k + 4); |
290 | if (rice->ksum < lim) { | 290 | if (UNLIKELY(rice->ksum < lim)) { |
291 | rice->k--; | 291 | rice->k--; |
292 | } else if (rice->ksum >= 2 * lim) { | 292 | } else if (UNLIKELY(rice->ksum >= 2 * lim)) { |
293 | rice->k++; | 293 | rice->k++; |
294 | } | 294 | } |
295 | } | 295 | } |
@@ -300,11 +300,12 @@ static inline int entropy_decode3980(struct rice_t* rice) | |||
300 | int base, x, pivot, overflow; | 300 | int base, x, pivot, overflow; |
301 | 301 | ||
302 | pivot = rice->ksum >> 5; | 302 | pivot = rice->ksum >> 5; |
303 | if (pivot == 0) pivot=1; | 303 | if (UNLIKELY(pivot == 0)) |
304 | pivot=1; | ||
304 | 305 | ||
305 | overflow = range_get_symbol_3980(); | 306 | overflow = range_get_symbol_3980(); |
306 | 307 | ||
307 | if (overflow == (MODEL_ELEMENTS-1)) { | 308 | if (UNLIKELY(overflow == (MODEL_ELEMENTS-1))) { |
308 | overflow = range_decode_short() << 16; | 309 | overflow = range_decode_short() << 16; |
309 | overflow |= range_decode_short(); | 310 | overflow |= range_decode_short(); |
310 | } | 311 | } |
@@ -352,7 +353,7 @@ static inline int entropy_decode3970(struct rice_t* rice) | |||
352 | 353 | ||
353 | int overflow = range_get_symbol_3970(); | 354 | int overflow = range_get_symbol_3970(); |
354 | 355 | ||
355 | if (overflow == (MODEL_ELEMENTS - 1)) { | 356 | if (UNLIKELY(overflow == (MODEL_ELEMENTS - 1))) { |
356 | tmpk = range_decode_bits(5); | 357 | tmpk = range_decode_bits(5); |
357 | overflow = 0; | 358 | overflow = 0; |
358 | } else { | 359 | } else { |
@@ -435,13 +436,13 @@ int ICODE_ATTR_DEMAC entropy_decode(struct ape_ctx_t* ape_ctx, | |||
435 | memset(decoded1, 0, blockstodecode * sizeof(int32_t)); | 436 | memset(decoded1, 0, blockstodecode * sizeof(int32_t)); |
436 | } else { | 437 | } else { |
437 | if (ape_ctx->fileversion > 3970) { | 438 | if (ape_ctx->fileversion > 3970) { |
438 | while (blockstodecode--) { | 439 | while (LIKELY(blockstodecode--)) { |
439 | *(decoded0++) = entropy_decode3980(&riceY); | 440 | *(decoded0++) = entropy_decode3980(&riceY); |
440 | if (decoded1 != NULL) | 441 | if (decoded1 != NULL) |
441 | *(decoded1++) = entropy_decode3980(&riceX); | 442 | *(decoded1++) = entropy_decode3980(&riceX); |
442 | } | 443 | } |
443 | } else { | 444 | } else { |
444 | while (blockstodecode--) { | 445 | while (LIKELY(blockstodecode--)) { |
445 | *(decoded0++) = entropy_decode3970(&riceY); | 446 | *(decoded0++) = entropy_decode3970(&riceY); |
446 | if (decoded1 != NULL) | 447 | if (decoded1 != NULL) |
447 | *(decoded1++) = entropy_decode3970(&riceX); | 448 | *(decoded1++) = entropy_decode3970(&riceX); |
diff --git a/apps/codecs/demac/libdemac/filter.c b/apps/codecs/demac/libdemac/filter.c index 5601fffcd4..d66bdc69b0 100644 --- a/apps/codecs/demac/libdemac/filter.c +++ b/apps/codecs/demac/libdemac/filter.c | |||
@@ -100,7 +100,7 @@ struct filter_t { | |||
100 | #if defined(CPU_ARM) && (ARM_ARCH >= 6) | 100 | #if defined(CPU_ARM) && (ARM_ARCH >= 6) |
101 | #define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; }) | 101 | #define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; }) |
102 | #else | 102 | #else |
103 | #define SATURATE(x) (((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF); | 103 | #define SATURATE(x) (LIKELY((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF); |
104 | #endif | 104 | #endif |
105 | 105 | ||
106 | /* Apply the filter with state f to count entries in data[] */ | 106 | /* Apply the filter with state f to count entries in data[] */ |
@@ -109,20 +109,22 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f, | |||
109 | int32_t* data, int count) | 109 | int32_t* data, int count) |
110 | { | 110 | { |
111 | int res; | 111 | int res; |
112 | int absres; | 112 | int absres; |
113 | 113 | ||
114 | #ifdef PREPARE_SCALARPRODUCT | 114 | #ifdef PREPARE_SCALARPRODUCT |
115 | PREPARE_SCALARPRODUCT | 115 | PREPARE_SCALARPRODUCT |
116 | #endif | 116 | #endif |
117 | 117 | ||
118 | while(count--) | 118 | while(LIKELY(count--)) |
119 | { | 119 | { |
120 | res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER)); | 120 | res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER)); |
121 | 121 | ||
122 | if (*data < 0) | 122 | if (LIKELY(*data != 0)) { |
123 | vector_add(f->coeffs, f->adaptcoeffs - ORDER); | 123 | if (*data < 0) |
124 | else if (*data > 0) | 124 | vector_add(f->coeffs, f->adaptcoeffs - ORDER); |
125 | vector_sub(f->coeffs, f->adaptcoeffs - ORDER); | 125 | else |
126 | vector_sub(f->coeffs, f->adaptcoeffs - ORDER); | ||
127 | } | ||
126 | 128 | ||
127 | res += *data; | 129 | res += *data; |
128 | 130 | ||
@@ -136,11 +138,11 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f, | |||
136 | /* Update the adaption coefficients */ | 138 | /* Update the adaption coefficients */ |
137 | absres = (res < 0 ? -res : res); | 139 | absres = (res < 0 ? -res : res); |
138 | 140 | ||
139 | if (absres > (f->avg * 3)) | 141 | if (UNLIKELY(absres > (f->avg * 3))) |
140 | *f->adaptcoeffs = ((res >> 25) & 64) - 32; | 142 | *f->adaptcoeffs = ((res >> 25) & 64) - 32; |
141 | else if (absres > (f->avg * 4) / 3) | 143 | else if (absres > (f->avg * 4) / 3) |
142 | *f->adaptcoeffs = ((res >> 26) & 32) - 16; | 144 | *f->adaptcoeffs = ((res >> 26) & 32) - 16; |
143 | else if (absres > 0) | 145 | else if (LIKELY(absres > 0)) |
144 | *f->adaptcoeffs = ((res >> 27) & 16) - 8; | 146 | *f->adaptcoeffs = ((res >> 27) & 16) - 8; |
145 | else | 147 | else |
146 | *f->adaptcoeffs = 0; | 148 | *f->adaptcoeffs = 0; |
@@ -154,7 +156,7 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f, | |||
154 | f->adaptcoeffs++; | 156 | f->adaptcoeffs++; |
155 | 157 | ||
156 | /* Have we filled the history buffer? */ | 158 | /* Have we filled the history buffer? */ |
157 | if (f->delay == f->history_end) { | 159 | if (UNLIKELY(f->delay == f->history_end)) { |
158 | memmove(f->coeffs + ORDER, f->delay - (ORDER*2), | 160 | memmove(f->coeffs + ORDER, f->delay - (ORDER*2), |
159 | (ORDER*2) * sizeof(filter_int)); | 161 | (ORDER*2) * sizeof(filter_int)); |
160 | f->adaptcoeffs = f->coeffs + ORDER*2; | 162 | f->adaptcoeffs = f->coeffs + ORDER*2; |
@@ -172,14 +174,16 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f, | |||
172 | PREPARE_SCALARPRODUCT | 174 | PREPARE_SCALARPRODUCT |
173 | #endif | 175 | #endif |
174 | 176 | ||
175 | while(count--) | 177 | while(LIKELY(count--)) |
176 | { | 178 | { |
177 | res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER)); | 179 | res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER)); |
178 | 180 | ||
179 | if (*data < 0) | 181 | if (LIKELY(*data != 0)) { |
180 | vector_add(f->coeffs, f->adaptcoeffs - ORDER); | 182 | if (*data < 0) |
181 | else if (*data > 0) | 183 | vector_add(f->coeffs, f->adaptcoeffs - ORDER); |
182 | vector_sub(f->coeffs, f->adaptcoeffs - ORDER); | 184 | else |
185 | vector_sub(f->coeffs, f->adaptcoeffs - ORDER); | ||
186 | } | ||
183 | 187 | ||
184 | /* Convert res from (32-FRACBITS).FRACBITS fixed-point format to an | 188 | /* Convert res from (32-FRACBITS).FRACBITS fixed-point format to an |
185 | integer (rounding to nearest) and add the input value to | 189 | integer (rounding to nearest) and add the input value to |
@@ -199,7 +203,7 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f, | |||
199 | f->adaptcoeffs++; | 203 | f->adaptcoeffs++; |
200 | 204 | ||
201 | /* Have we filled the history buffer? */ | 205 | /* Have we filled the history buffer? */ |
202 | if (f->delay == f->history_end) { | 206 | if (UNLIKELY(f->delay == f->history_end)) { |
203 | memmove(f->coeffs + ORDER, f->delay - (ORDER*2), | 207 | memmove(f->coeffs + ORDER, f->delay - (ORDER*2), |
204 | (ORDER*2) * sizeof(filter_int)); | 208 | (ORDER*2) * sizeof(filter_int)); |
205 | f->adaptcoeffs = f->coeffs + ORDER*2; | 209 | f->adaptcoeffs = f->coeffs + ORDER*2; |
diff --git a/apps/codecs/demac/libdemac/predictor-arm.S b/apps/codecs/demac/libdemac/predictor-arm.S index dfeba0dcc8..d62b6186f8 100644 --- a/apps/codecs/demac/libdemac/predictor-arm.S +++ b/apps/codecs/demac/libdemac/predictor-arm.S | |||
@@ -468,14 +468,24 @@ loop: | |||
468 | 468 | ||
469 | add r11, r12, #historybuffer @ r11 := &p->historybuffer[0] | 469 | add r11, r12, #historybuffer @ r11 := &p->historybuffer[0] |
470 | 470 | ||
471 | sub r10, r14, #PREDICTOR_HISTORY_SIZE*4 | 471 | sub r10, r14, #PREDICTOR_HISTORY_SIZE*4 |
472 | @ r10 := p->buf - PREDICTOR_HISTORY_SIZE | 472 | @ r10 := p->buf - PREDICTOR_HISTORY_SIZE |
473 | 473 | ||
474 | cmp r10, r11 | 474 | cmp r10, r11 |
475 | bne endofloop | 475 | beq move_hist @ The history buffer is full, we need to do a memmove |
476 | 476 | ||
477 | @ The history buffer is full, we need to do a memmove: | 477 | @ Check loop count |
478 | ldr r0, [sp, #8] | ||
479 | subs r0, r0, #1 | ||
480 | strne r0, [sp, #8] | ||
481 | bne loop | ||
482 | |||
483 | done: | ||
484 | str r14, [r12] @ Save value of p->buf | ||
485 | add sp, sp, #12 @ Don't bother restoring r1-r3 | ||
486 | ldmia sp!, {r4-r11, pc} | ||
478 | 487 | ||
488 | move_hist: | ||
479 | @ dest = r11 (p->historybuffer) | 489 | @ dest = r11 (p->historybuffer) |
480 | @ src = r14 (p->buf) | 490 | @ src = r14 (p->buf) |
481 | @ n = 200 | 491 | @ n = 200 |
@@ -493,15 +503,10 @@ loop: | |||
493 | 503 | ||
494 | add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0] | 504 | add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0] |
495 | 505 | ||
496 | 506 | @ Check loop count | |
497 | endofloop: | ||
498 | @ Check loop count | ||
499 | ldr r0, [sp, #8] | 507 | ldr r0, [sp, #8] |
500 | subs r0, r0, #1 | 508 | subs r0, r0, #1 |
501 | strne r0, [sp, #8] | 509 | strne r0, [sp, #8] |
502 | bne loop | 510 | bne loop |
503 | 511 | ||
504 | done: | 512 | b done |
505 | str r14, [r12] @ Save value of p->buf | ||
506 | add sp, sp, #12 @ Don't bother restoring r1-r3 | ||
507 | ldmia sp!, {r4-r11, pc} | ||
diff --git a/apps/codecs/demac/libdemac/predictor-cf.S b/apps/codecs/demac/libdemac/predictor-cf.S index b12d0932b0..0a1ffe9ead 100644 --- a/apps/codecs/demac/libdemac/predictor-cf.S +++ b/apps/codecs/demac/libdemac/predictor-cf.S | |||
@@ -486,10 +486,18 @@ predictor_decode_stereo: | |||
486 | | %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE] | 486 | | %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE] |
487 | 487 | ||
488 | cmp.l %a3, %a5 | 488 | cmp.l %a3, %a5 |
489 | bne.s .endofloop | 489 | beq.s .move_hist | The history buffer is full, we need to do a memmove |
490 | 490 | ||
491 | | The history buffer is full, we need to do a memmove: | 491 | subq.l #1, (8,%sp) | decrease loop count |
492 | bne.w .loop | ||
492 | 493 | ||
494 | .done: | ||
495 | move.l %a5, (%a6) | Save value of p->buf | ||
496 | movem.l (3*4,%sp), %d2-%d7/%a2-%a6 | ||
497 | lea.l (14*4,%sp), %sp | ||
498 | rts | ||
499 | |||
500 | .move_hist: | ||
493 | lea.l (historybuffer,%a6), %a3 | 501 | lea.l (historybuffer,%a6), %a3 |
494 | 502 | ||
495 | | dest = %a3 (p->historybuffer) | 503 | | dest = %a3 (p->historybuffer) |
@@ -497,33 +505,19 @@ predictor_decode_stereo: | |||
497 | | n = 200 | 505 | | n = 200 |
498 | 506 | ||
499 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes | 507 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes |
500 | lea.l (40,%a5), %a5 | ||
501 | movem.l %d0-%d7/%a0-%a1, (%a3) | 508 | movem.l %d0-%d7/%a0-%a1, (%a3) |
502 | lea.l (40,%a3), %a3 | 509 | movem.l (40,%a5), %d0-%d7/%a0-%a1 | 40 bytes |
503 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes | 510 | movem.l %d0-%d7/%a0-%a1, (40,%a3) |
504 | lea.l (40,%a5), %a5 | 511 | movem.l (80,%a5), %d0-%d7/%a0-%a1 | 40 bytes |
505 | movem.l %d0-%d7/%a0-%a1, (%a3) | 512 | movem.l %d0-%d7/%a0-%a1, (80,%a3) |
506 | lea.l (40,%a3), %a3 | 513 | movem.l (120,%a5), %d0-%d7/%a0-%a1 | 40 bytes |
507 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes | 514 | movem.l %d0-%d7/%a0-%a1, (120,%a3) |
508 | lea.l (40,%a5), %a5 | 515 | movem.l (160,%a5), %d0-%d7/%a0-%a1 | 40 bytes |
509 | movem.l %d0-%d7/%a0-%a1, (%a3) | 516 | movem.l %d0-%d7/%a0-%a1, (160,%a3) |
510 | lea.l (40,%a3), %a3 | ||
511 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
512 | lea.l (40,%a5), %a5 | ||
513 | movem.l %d0-%d7/%a0-%a1, (%a3) | ||
514 | lea.l (40,%a3), %a3 | ||
515 | movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes | ||
516 | lea.l (40,%a5), %a5 | ||
517 | movem.l %d0-%d7/%a0-%a1, (%a3) | ||
518 | lea.l (40,%a3), %a3 | ||
519 | 517 | ||
520 | lea.l (historybuffer,%a6), %a5 | p->buf = &p->historybuffer[0] | 518 | move.l %a3, %a5 | p->buf = &p->historybuffer[0] |
521 | 519 | ||
522 | .endofloop: | ||
523 | subq.l #1, (8,%sp) | decrease loop count | 520 | subq.l #1, (8,%sp) | decrease loop count |
524 | bne.w .loop | 521 | bne.w .loop |
525 | 522 | ||
526 | move.l %a5, (%a6) | Save value of p->buf | 523 | bra.s .done |
527 | movem.l (3*4,%sp), %d2-%d7/%a2-%a6 | ||
528 | lea.l (14*4,%sp), %sp | ||
529 | rts | ||
diff --git a/apps/codecs/demac/libdemac/predictor.c b/apps/codecs/demac/libdemac/predictor.c index 1a9b48e0ca..67a17f4b5d 100644 --- a/apps/codecs/demac/libdemac/predictor.c +++ b/apps/codecs/demac/libdemac/predictor.c | |||
@@ -75,7 +75,7 @@ int ICODE_ATTR_DEMAC predictor_decode_stereo(struct predictor_t* p, | |||
75 | { | 75 | { |
76 | int32_t predictionA, predictionB; | 76 | int32_t predictionA, predictionB; |
77 | 77 | ||
78 | while (count--) | 78 | while (LIKELY(count--)) |
79 | { | 79 | { |
80 | /* Predictor Y */ | 80 | /* Predictor Y */ |
81 | p->buf[YDELAYA] = p->YlastA; | 81 | p->buf[YDELAYA] = p->YlastA; |
@@ -134,60 +134,66 @@ int ICODE_ATTR_DEMAC predictor_decode_stereo(struct predictor_t* p, | |||
134 | p->XlastA = *decoded1 + ((predictionA + (predictionB >> 1)) >> 10); | 134 | p->XlastA = *decoded1 + ((predictionA + (predictionB >> 1)) >> 10); |
135 | p->XfilterA = p->XlastA + ((p->XfilterA * 31) >> 5); | 135 | p->XfilterA = p->XlastA + ((p->XfilterA * 31) >> 5); |
136 | 136 | ||
137 | if (*decoded0 > 0) | 137 | if (LIKELY(*decoded0 != 0)) |
138 | { | 138 | { |
139 | p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA]; | 139 | if (*decoded0 > 0) |
140 | p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1]; | 140 | { |
141 | p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2]; | 141 | p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA]; |
142 | p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3]; | 142 | p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1]; |
143 | 143 | p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2]; | |
144 | p->YcoeffsB[0] -= p->buf[YADAPTCOEFFSB]; | 144 | p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3]; |
145 | p->YcoeffsB[1] -= p->buf[YADAPTCOEFFSB-1]; | 145 | |
146 | p->YcoeffsB[2] -= p->buf[YADAPTCOEFFSB-2]; | 146 | p->YcoeffsB[0] -= p->buf[YADAPTCOEFFSB]; |
147 | p->YcoeffsB[3] -= p->buf[YADAPTCOEFFSB-3]; | 147 | p->YcoeffsB[1] -= p->buf[YADAPTCOEFFSB-1]; |
148 | p->YcoeffsB[4] -= p->buf[YADAPTCOEFFSB-4]; | 148 | p->YcoeffsB[2] -= p->buf[YADAPTCOEFFSB-2]; |
149 | } | 149 | p->YcoeffsB[3] -= p->buf[YADAPTCOEFFSB-3]; |
150 | else if (*decoded0 < 0) | 150 | p->YcoeffsB[4] -= p->buf[YADAPTCOEFFSB-4]; |
151 | { | 151 | } |
152 | p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA]; | 152 | else |
153 | p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1]; | 153 | { |
154 | p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2]; | 154 | p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA]; |
155 | p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3]; | 155 | p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1]; |
156 | 156 | p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2]; | |
157 | p->YcoeffsB[0] += p->buf[YADAPTCOEFFSB]; | 157 | p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3]; |
158 | p->YcoeffsB[1] += p->buf[YADAPTCOEFFSB-1]; | 158 | |
159 | p->YcoeffsB[2] += p->buf[YADAPTCOEFFSB-2]; | 159 | p->YcoeffsB[0] += p->buf[YADAPTCOEFFSB]; |
160 | p->YcoeffsB[3] += p->buf[YADAPTCOEFFSB-3]; | 160 | p->YcoeffsB[1] += p->buf[YADAPTCOEFFSB-1]; |
161 | p->YcoeffsB[4] += p->buf[YADAPTCOEFFSB-4]; | 161 | p->YcoeffsB[2] += p->buf[YADAPTCOEFFSB-2]; |
162 | p->YcoeffsB[3] += p->buf[YADAPTCOEFFSB-3]; | ||
163 | p->YcoeffsB[4] += p->buf[YADAPTCOEFFSB-4]; | ||
164 | } | ||
162 | } | 165 | } |
163 | 166 | ||
164 | *(decoded0++) = p->YfilterA; | 167 | *(decoded0++) = p->YfilterA; |
165 | 168 | ||
166 | if (*decoded1 > 0) | 169 | if (LIKELY(*decoded1 != 0)) |
167 | { | ||
168 | p->XcoeffsA[0] -= p->buf[XADAPTCOEFFSA]; | ||
169 | p->XcoeffsA[1] -= p->buf[XADAPTCOEFFSA-1]; | ||
170 | p->XcoeffsA[2] -= p->buf[XADAPTCOEFFSA-2]; | ||
171 | p->XcoeffsA[3] -= p->buf[XADAPTCOEFFSA-3]; | ||
172 | |||
173 | p->XcoeffsB[0] -= p->buf[XADAPTCOEFFSB]; | ||
174 | p->XcoeffsB[1] -= p->buf[XADAPTCOEFFSB-1]; | ||
175 | p->XcoeffsB[2] -= p->buf[XADAPTCOEFFSB-2]; | ||
176 | p->XcoeffsB[3] -= p->buf[XADAPTCOEFFSB-3]; | ||
177 | p->XcoeffsB[4] -= p->buf[XADAPTCOEFFSB-4]; | ||
178 | } | ||
179 | else if (*decoded1 < 0) | ||
180 | { | 170 | { |
181 | p->XcoeffsA[0] += p->buf[XADAPTCOEFFSA]; | 171 | if (*decoded1 > 0) |
182 | p->XcoeffsA[1] += p->buf[XADAPTCOEFFSA-1]; | 172 | { |
183 | p->XcoeffsA[2] += p->buf[XADAPTCOEFFSA-2]; | 173 | p->XcoeffsA[0] -= p->buf[XADAPTCOEFFSA]; |
184 | p->XcoeffsA[3] += p->buf[XADAPTCOEFFSA-3]; | 174 | p->XcoeffsA[1] -= p->buf[XADAPTCOEFFSA-1]; |
185 | 175 | p->XcoeffsA[2] -= p->buf[XADAPTCOEFFSA-2]; | |
186 | p->XcoeffsB[0] += p->buf[XADAPTCOEFFSB]; | 176 | p->XcoeffsA[3] -= p->buf[XADAPTCOEFFSA-3]; |
187 | p->XcoeffsB[1] += p->buf[XADAPTCOEFFSB-1]; | 177 | |
188 | p->XcoeffsB[2] += p->buf[XADAPTCOEFFSB-2]; | 178 | p->XcoeffsB[0] -= p->buf[XADAPTCOEFFSB]; |
189 | p->XcoeffsB[3] += p->buf[XADAPTCOEFFSB-3]; | 179 | p->XcoeffsB[1] -= p->buf[XADAPTCOEFFSB-1]; |
190 | p->XcoeffsB[4] += p->buf[XADAPTCOEFFSB-4]; | 180 | p->XcoeffsB[2] -= p->buf[XADAPTCOEFFSB-2]; |
181 | p->XcoeffsB[3] -= p->buf[XADAPTCOEFFSB-3]; | ||
182 | p->XcoeffsB[4] -= p->buf[XADAPTCOEFFSB-4]; | ||
183 | } | ||
184 | else | ||
185 | { | ||
186 | p->XcoeffsA[0] += p->buf[XADAPTCOEFFSA]; | ||
187 | p->XcoeffsA[1] += p->buf[XADAPTCOEFFSA-1]; | ||
188 | p->XcoeffsA[2] += p->buf[XADAPTCOEFFSA-2]; | ||
189 | p->XcoeffsA[3] += p->buf[XADAPTCOEFFSA-3]; | ||
190 | |||
191 | p->XcoeffsB[0] += p->buf[XADAPTCOEFFSB]; | ||
192 | p->XcoeffsB[1] += p->buf[XADAPTCOEFFSB-1]; | ||
193 | p->XcoeffsB[2] += p->buf[XADAPTCOEFFSB-2]; | ||
194 | p->XcoeffsB[3] += p->buf[XADAPTCOEFFSB-3]; | ||
195 | p->XcoeffsB[4] += p->buf[XADAPTCOEFFSB-4]; | ||
196 | } | ||
191 | } | 197 | } |
192 | 198 | ||
193 | *(decoded1++) = p->XfilterA; | 199 | *(decoded1++) = p->XfilterA; |
@@ -196,7 +202,7 @@ int ICODE_ATTR_DEMAC predictor_decode_stereo(struct predictor_t* p, | |||
196 | p->buf++; | 202 | p->buf++; |
197 | 203 | ||
198 | /* Have we filled the history buffer? */ | 204 | /* Have we filled the history buffer? */ |
199 | if (p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE) { | 205 | if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) { |
200 | memmove(p->historybuffer, p->buf, | 206 | memmove(p->historybuffer, p->buf, |
201 | PREDICTOR_SIZE * sizeof(int32_t)); | 207 | PREDICTOR_SIZE * sizeof(int32_t)); |
202 | p->buf = p->historybuffer; | 208 | p->buf = p->historybuffer; |
@@ -215,7 +221,7 @@ int ICODE_ATTR_DEMAC predictor_decode_mono(struct predictor_t* p, | |||
215 | 221 | ||
216 | currentA = p->YlastA; | 222 | currentA = p->YlastA; |
217 | 223 | ||
218 | while (count--) | 224 | while (LIKELY(count--)) |
219 | { | 225 | { |
220 | A = *decoded0; | 226 | A = *decoded0; |
221 | 227 | ||
@@ -232,25 +238,28 @@ int ICODE_ATTR_DEMAC predictor_decode_mono(struct predictor_t* p, | |||
232 | p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]); | 238 | p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]); |
233 | p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]); | 239 | p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]); |
234 | 240 | ||
235 | if (A > 0) | 241 | if (LIKELY(A != 0)) |
236 | { | ||
237 | p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA]; | ||
238 | p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1]; | ||
239 | p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2]; | ||
240 | p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3]; | ||
241 | } | ||
242 | else if (A < 0) | ||
243 | { | 242 | { |
244 | p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA]; | 243 | if (A > 0) |
245 | p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1]; | 244 | { |
246 | p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2]; | 245 | p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA]; |
247 | p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3]; | 246 | p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1]; |
247 | p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2]; | ||
248 | p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3]; | ||
249 | } | ||
250 | else | ||
251 | { | ||
252 | p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA]; | ||
253 | p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1]; | ||
254 | p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2]; | ||
255 | p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3]; | ||
256 | } | ||
248 | } | 257 | } |
249 | 258 | ||
250 | p->buf++; | 259 | p->buf++; |
251 | 260 | ||
252 | /* Have we filled the history buffer? */ | 261 | /* Have we filled the history buffer? */ |
253 | if (p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE) { | 262 | if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) { |
254 | memmove(p->historybuffer, p->buf, | 263 | memmove(p->historybuffer, p->buf, |
255 | PREDICTOR_SIZE * sizeof(int32_t)); | 264 | PREDICTOR_SIZE * sizeof(int32_t)); |
256 | p->buf = p->historybuffer; | 265 | p->buf = p->historybuffer; |