diff options
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/lib/asm_mcf5249.h | 8 | ||||
-rw-r--r-- | apps/codecs/libtremor/asm_arm.h | 134 | ||||
-rw-r--r-- | apps/codecs/libtremor/asm_mcf5249.h | 21 | ||||
-rw-r--r-- | apps/codecs/libtremor/block.c | 222 | ||||
-rw-r--r-- | apps/codecs/libtremor/config-tremor.h | 29 | ||||
-rw-r--r-- | apps/codecs/libtremor/ivorbiscodec.h | 4 | ||||
-rw-r--r-- | apps/codecs/libtremor/mapping0.c | 2 | ||||
-rw-r--r-- | apps/codecs/libtremor/misc.h | 20 | ||||
-rw-r--r-- | apps/codecs/libtremor/oggmalloc.c | 24 | ||||
-rw-r--r-- | apps/codecs/libtremor/os_types.h | 4 | ||||
-rw-r--r-- | apps/codecs/libtremor/res012.c | 2 | ||||
-rw-r--r-- | apps/codecs/libtremor/synthesis.c | 36 | ||||
-rw-r--r-- | apps/codecs/libtremor/window.c | 12 | ||||
-rw-r--r-- | apps/codecs/libtremor/window_lookup.h | 4 |
14 files changed, 335 insertions, 187 deletions
diff --git a/apps/codecs/lib/asm_mcf5249.h b/apps/codecs/lib/asm_mcf5249.h index 20899f0a5b..e3dc8dd684 100644 --- a/apps/codecs/lib/asm_mcf5249.h +++ b/apps/codecs/lib/asm_mcf5249.h | |||
@@ -143,7 +143,7 @@ static inline | |||
143 | void vect_add(int32_t *x, int32_t *y, int n) | 143 | void vect_add(int32_t *x, int32_t *y, int n) |
144 | { | 144 | { |
145 | /* align to 16 bytes */ | 145 | /* align to 16 bytes */ |
146 | while(n>0 && (int)x&16) { | 146 | while(n>0 && (int)x&15) { |
147 | *x++ += *y++; | 147 | *x++ += *y++; |
148 | n--; | 148 | n--; |
149 | } | 149 | } |
@@ -177,7 +177,7 @@ static inline | |||
177 | void vect_copy(int32_t *x, int32_t *y, int n) | 177 | void vect_copy(int32_t *x, int32_t *y, int n) |
178 | { | 178 | { |
179 | /* align to 16 bytes */ | 179 | /* align to 16 bytes */ |
180 | while(n>0 && (int)x&16) { | 180 | while(n>0 && (int)x&15) { |
181 | *x++ = *y++; | 181 | *x++ = *y++; |
182 | n--; | 182 | n--; |
183 | } | 183 | } |
@@ -204,7 +204,7 @@ static inline | |||
204 | void vect_mult_fw(int32_t *data, int32_t *window, int n) | 204 | void vect_mult_fw(int32_t *data, int32_t *window, int n) |
205 | { | 205 | { |
206 | /* ensure data is aligned to 16-bytes */ | 206 | /* ensure data is aligned to 16-bytes */ |
207 | while(n>0 && (int)data%16) { | 207 | while(n>0 && (int)data&15) { |
208 | *data = MULT31(*data, *window); | 208 | *data = MULT31(*data, *window); |
209 | data++; | 209 | data++; |
210 | window++; | 210 | window++; |
@@ -258,7 +258,7 @@ static inline | |||
258 | void vect_mult_bw(int32_t *data, int32_t *window, int n) | 258 | void vect_mult_bw(int32_t *data, int32_t *window, int n) |
259 | { | 259 | { |
260 | /* ensure at least data is aligned to 16-bytes */ | 260 | /* ensure at least data is aligned to 16-bytes */ |
261 | while(n>0 && (int)data%16) { | 261 | while(n>0 && (int)data&15) { |
262 | *data = MULT31(*data, *window); | 262 | *data = MULT31(*data, *window); |
263 | data++; | 263 | data++; |
264 | window--; | 264 | window--; |
diff --git a/apps/codecs/libtremor/asm_arm.h b/apps/codecs/libtremor/asm_arm.h index bc09ac5170..5a8109841f 100644 --- a/apps/codecs/libtremor/asm_arm.h +++ b/apps/codecs/libtremor/asm_arm.h | |||
@@ -99,104 +99,120 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, | |||
99 | #define _V_VECT_OPS | 99 | #define _V_VECT_OPS |
100 | 100 | ||
101 | /* asm versions of vector operations for block.c, window.c */ | 101 | /* asm versions of vector operations for block.c, window.c */ |
102 | /* SOME IMPORTANT NOTES: this implementation of vect_mult_bw does | ||
103 | NOT do a final shift, meaning that the result of vect_mult_bw is | ||
104 | only 31 bits not 32. This is so that we can do the shift in-place | ||
105 | in vect_add_xxxx instead to save one instruction for each mult on arm */ | ||
102 | static inline | 106 | static inline |
103 | void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) | 107 | void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n) |
104 | { | 108 | { |
105 | while (n>=4) { | 109 | /* first arg is right subframe of previous frame and second arg |
106 | asm volatile ("ldmia %[x], {r0, r1, r2, r3};" | 110 | is left subframe of current frame. overlap left onto right overwriting |
111 | the right subframe */ | ||
112 | |||
113 | do{ | ||
114 | asm volatile ( | ||
115 | "ldmia %[x], {r0, r1, r2, r3};" | ||
107 | "ldmia %[y]!, {r4, r5, r6, r7};" | 116 | "ldmia %[y]!, {r4, r5, r6, r7};" |
108 | "add r0, r0, r4;" | 117 | "add r0, r4, r0, lsl #1;" |
109 | "add r1, r1, r5;" | 118 | "add r1, r5, r1, lsl #1;" |
110 | "add r2, r2, r6;" | 119 | "add r2, r6, r2, lsl #1;" |
111 | "add r3, r3, r7;" | 120 | "add r3, r7, r3, lsl #1;" |
121 | "stmia %[x]!, {r0, r1, r2, r3};" | ||
122 | "ldmia %[x], {r0, r1, r2, r3};" | ||
123 | "ldmia %[y]!, {r4, r5, r6, r7};" | ||
124 | "add r0, r4, r0, lsl #1;" | ||
125 | "add r1, r5, r1, lsl #1;" | ||
126 | "add r2, r6, r2, lsl #1;" | ||
127 | "add r3, r7, r3, lsl #1;" | ||
112 | "stmia %[x]!, {r0, r1, r2, r3};" | 128 | "stmia %[x]!, {r0, r1, r2, r3};" |
113 | : [x] "+r" (x), [y] "+r" (y) | 129 | : [x] "+r" (x), [y] "+r" (y) |
114 | : : "r0", "r1", "r2", "r3", | 130 | : : "r0", "r1", "r2", "r3", |
115 | "r4", "r5", "r6", "r7", | 131 | "r4", "r5", "r6", "r7", |
116 | "memory"); | 132 | "memory"); |
117 | n -= 4; | 133 | n -= 8; |
118 | } | 134 | } while (n); |
119 | /* add final elements */ | ||
120 | while (n>0) { | ||
121 | *x++ += *y++; | ||
122 | n--; | ||
123 | } | ||
124 | } | 135 | } |
125 | 136 | ||
126 | static inline | 137 | static inline |
127 | void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) | 138 | void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n) |
128 | { | 139 | { |
129 | while (n>=4) { | 140 | /* first arg is left subframe of current frame and second arg |
130 | asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};" | 141 | is right subframe of previous frame. overlap right onto left overwriting |
142 | the LEFT subframe */ | ||
143 | do{ | ||
144 | asm volatile ( | ||
145 | "ldmia %[x], {r0, r1, r2, r3};" | ||
146 | "ldmia %[y]!, {r4, r5, r6, r7};" | ||
147 | "add r0, r0, r4, lsl #1;" | ||
148 | "add r1, r1, r5, lsl #1;" | ||
149 | "add r2, r2, r6, lsl #1;" | ||
150 | "add r3, r3, r7, lsl #1;" | ||
151 | "stmia %[x]!, {r0, r1, r2, r3};" | ||
152 | "ldmia %[x], {r0, r1, r2, r3};" | ||
153 | "ldmia %[y]!, {r4, r5, r6, r7};" | ||
154 | "add r0, r0, r4, lsl #1;" | ||
155 | "add r1, r1, r5, lsl #1;" | ||
156 | "add r2, r2, r6, lsl #1;" | ||
157 | "add r3, r3, r7, lsl #1;" | ||
131 | "stmia %[x]!, {r0, r1, r2, r3};" | 158 | "stmia %[x]!, {r0, r1, r2, r3};" |
132 | : [x] "+r" (x), [y] "+r" (y) | 159 | : [x] "+r" (x), [y] "+r" (y) |
133 | : : "r0", "r1", "r2", "r3", | 160 | : : "r0", "r1", "r2", "r3", |
161 | "r4", "r5", "r6", "r7", | ||
134 | "memory"); | 162 | "memory"); |
135 | n -= 4; | 163 | n -= 8; |
136 | } | 164 | } while (n); |
137 | /* copy final elements */ | ||
138 | while (n>0) { | ||
139 | *x++ = *y++; | ||
140 | n--; | ||
141 | } | ||
142 | } | 165 | } |
143 | 166 | ||
144 | static inline | 167 | static inline |
145 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | 168 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) |
146 | { | 169 | { |
147 | while (n>=4) { | 170 | /* Note, mult_fw uses MULT31 */ |
148 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" | 171 | do{ |
172 | asm volatile ( | ||
173 | "ldmia %[d], {r0, r1, r2, r3};" | ||
149 | "ldmia %[w]!, {r4, r5, r6, r7};" | 174 | "ldmia %[w]!, {r4, r5, r6, r7};" |
150 | "smull r8, r9, r0, r4;" | 175 | "smull r8, r0, r4, r0;" |
151 | "mov r0, r9, lsl #1;" | 176 | "mov r0, r0, lsl #1;" |
152 | "smull r8, r9, r1, r5;" | 177 | "smull r8, r1, r5, r1;" |
153 | "mov r1, r9, lsl #1;" | 178 | "mov r1, r1, lsl #1;" |
154 | "smull r8, r9, r2, r6;" | 179 | "smull r8, r2, r6, r2;" |
155 | "mov r2, r9, lsl #1;" | 180 | "mov r2, r2, lsl #1;" |
156 | "smull r8, r9, r3, r7;" | 181 | "smull r8, r3, r7, r3;" |
157 | "mov r3, r9, lsl #1;" | 182 | "mov r3, r3, lsl #1;" |
158 | "stmia %[d]!, {r0, r1, r2, r3};" | 183 | "stmia %[d]!, {r0, r1, r2, r3};" |
159 | : [d] "+r" (data), [w] "+r" (window) | 184 | : [d] "+r" (data), [w] "+r" (window) |
160 | : : "r0", "r1", "r2", "r3", | 185 | : : "r0", "r1", "r2", "r3", |
161 | "r4", "r5", "r6", "r7", "r8", "r9", | 186 | "r4", "r5", "r6", "r7", "r8", |
162 | "memory", "cc"); | 187 | "memory", "cc"); |
163 | n -= 4; | 188 | n -= 4; |
164 | } | 189 | } while (n); |
165 | while(n>0) { | ||
166 | *data = MULT31(*data, *window); | ||
167 | data++; | ||
168 | window++; | ||
169 | n--; | ||
170 | } | ||
171 | } | 190 | } |
172 | 191 | ||
173 | static inline | 192 | static inline |
174 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | 193 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) |
175 | { | 194 | { |
176 | while (n>=4) { | 195 | /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */ |
196 | /* On ARM, we can do the shift at the same time as the overlap-add */ | ||
197 | do{ | ||
177 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" | 198 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" |
178 | "ldmda %[w]!, {r4, r5, r6, r7};" | 199 | "ldmda %[w]!, {r4, r5, r6, r7};" |
179 | "smull r8, r9, r0, r7;" | 200 | "smull r8, r0, r7, r0;" |
180 | "mov r0, r9, lsl #1;" | 201 | "smull r7, r1, r6, r1;" |
181 | "smull r8, r9, r1, r6;" | 202 | "smull r6, r2, r5, r2;" |
182 | "mov r1, r9, lsl #1;" | 203 | "smull r5, r3, r4, r3;" |
183 | "smull r8, r9, r2, r5;" | ||
184 | "mov r2, r9, lsl #1;" | ||
185 | "smull r8, r9, r3, r4;" | ||
186 | "mov r3, r9, lsl #1;" | ||
187 | "stmia %[d]!, {r0, r1, r2, r3};" | 204 | "stmia %[d]!, {r0, r1, r2, r3};" |
188 | : [d] "+r" (data), [w] "+r" (window) | 205 | : [d] "+r" (data), [w] "+r" (window) |
189 | : : "r0", "r1", "r2", "r3", | 206 | : : "r0", "r1", "r2", "r3", |
190 | "r4", "r5", "r6", "r7", "r8", "r9", | 207 | "r4", "r5", "r6", "r7", "r8", |
191 | "memory", "cc"); | 208 | "memory", "cc"); |
192 | n -= 4; | 209 | n -= 4; |
193 | } | 210 | } while (n); |
194 | while(n>0) { | 211 | } |
195 | *data = MULT31(*data, *window); | 212 | |
196 | data++; | 213 | static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n) |
197 | window--; | 214 | { |
198 | n--; | 215 | memcpy(x,y,n*sizeof(ogg_int32_t)); |
199 | } | ||
200 | } | 216 | } |
201 | 217 | ||
202 | #endif | 218 | #endif |
diff --git a/apps/codecs/libtremor/asm_mcf5249.h b/apps/codecs/libtremor/asm_mcf5249.h index 64dfb1b785..224a861afd 100644 --- a/apps/codecs/libtremor/asm_mcf5249.h +++ b/apps/codecs/libtremor/asm_mcf5249.h | |||
@@ -140,10 +140,10 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b, | |||
140 | /* asm versions of vector operations for block.c, window.c */ | 140 | /* asm versions of vector operations for block.c, window.c */ |
141 | /* assumes MAC is initialized & accumulators cleared */ | 141 | /* assumes MAC is initialized & accumulators cleared */ |
142 | static inline | 142 | static inline |
143 | void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) | 143 | void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n) |
144 | { | 144 | { |
145 | /* align to 16 bytes */ | 145 | /* align to 16 bytes */ |
146 | while(n>0 && (int)x&16) { | 146 | while(n>0 && (int)x&15) { |
147 | *x++ += *y++; | 147 | *x++ += *y++; |
148 | n--; | 148 | n--; |
149 | } | 149 | } |
@@ -172,12 +172,20 @@ void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) | |||
172 | n--; | 172 | n--; |
173 | } | 173 | } |
174 | } | 174 | } |
175 | static inline | ||
176 | void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n) | ||
177 | { | ||
178 | /* coldfire asm has symmetrical versions of vect_add_right_left | ||
179 | and vect_add_left_right (since symmetrical versions of | ||
180 | vect_mult_fw and vect_mult_bw i.e. both use MULT31) */ | ||
181 | vect_add_right_left(x, y, n ); | ||
182 | } | ||
175 | 183 | ||
176 | static inline | 184 | static inline |
177 | void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) | 185 | void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n) |
178 | { | 186 | { |
179 | /* align to 16 bytes */ | 187 | /* align to 16 bytes */ |
180 | while(n>0 && (int)x&16) { | 188 | while(n>0 && (int)x&15) { |
181 | *x++ = *y++; | 189 | *x++ = *y++; |
182 | n--; | 190 | n--; |
183 | } | 191 | } |
@@ -199,12 +207,11 @@ void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) | |||
199 | } | 207 | } |
200 | } | 208 | } |
201 | 209 | ||
202 | |||
203 | static inline | 210 | static inline |
204 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | 211 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) |
205 | { | 212 | { |
206 | /* ensure data is aligned to 16-bytes */ | 213 | /* ensure data is aligned to 16-bytes */ |
207 | while(n>0 && (int)data%16) { | 214 | while(n>0 && (int)data&15) { |
208 | *data = MULT31(*data, *window); | 215 | *data = MULT31(*data, *window); |
209 | data++; | 216 | data++; |
210 | window++; | 217 | window++; |
@@ -258,7 +265,7 @@ static inline | |||
258 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | 265 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) |
259 | { | 266 | { |
260 | /* ensure at least data is aligned to 16-bytes */ | 267 | /* ensure at least data is aligned to 16-bytes */ |
261 | while(n>0 && (int)data%16) { | 268 | while(n>0 && (int)data&15) { |
262 | *data = MULT31(*data, *window); | 269 | *data = MULT31(*data, *window); |
263 | data++; | 270 | data++; |
264 | window--; | 271 | window--; |
diff --git a/apps/codecs/libtremor/block.c b/apps/codecs/libtremor/block.c index e609fc44f7..eb087e12a9 100644 --- a/apps/codecs/libtremor/block.c +++ b/apps/codecs/libtremor/block.c | |||
@@ -36,6 +36,13 @@ static int ilog(unsigned int v){ | |||
36 | return(ret); | 36 | return(ret); |
37 | } | 37 | } |
38 | 38 | ||
39 | static ogg_int32_t* _pcmp [CHANNELS] IBSS_ATTR; | ||
40 | static ogg_int32_t* _pcmbp[CHANNELS] IBSS_ATTR; | ||
41 | static ogg_int32_t* _pcmret[CHANNELS] IBSS_ATTR; | ||
42 | /* if true, we have both pcm buffers in iram and we use a bufferflip. | ||
43 | if false, we have one in iram and one in mem, and we use a memcpy */ | ||
44 | static bool iram_pcm_doublebuffer IBSS_ATTR; | ||
45 | |||
39 | /* pcm accumulator examples (not exhaustive): | 46 | /* pcm accumulator examples (not exhaustive): |
40 | 47 | ||
41 | <-------------- lW ----------------> | 48 | <-------------- lW ----------------> |
@@ -145,18 +152,44 @@ int vorbis_block_clear(vorbis_block *vb){ | |||
145 | 152 | ||
146 | static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ | 153 | static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ |
147 | int i; | 154 | int i; |
155 | long b_size[2]; | ||
156 | LOOKUP_TNC *iramposw; | ||
157 | ogg_int32_t *internal_pcm=NULL; | ||
158 | |||
148 | codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; | 159 | codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; |
149 | private_state *b=NULL; | 160 | private_state *b=NULL; |
150 | 161 | ||
151 | memset(v,0,sizeof(*v)); | 162 | memset(v,0,sizeof(*v)); |
163 | v->reset_pcmb=true; | ||
152 | b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b))); | 164 | b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b))); |
153 | 165 | ||
154 | v->vi=vi; | 166 | v->vi=vi; |
155 | b->modebits=ilog(ci->modes); | 167 | b->modebits=ilog(ci->modes); |
156 | 168 | ||
169 | /* allocate IRAM buffer for the PCM data generated by synthesis */ | ||
170 | iram_malloc_init(); | ||
171 | v->iram_pcm=(ogg_int32_t *)iram_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t)); | ||
172 | if(v->iram_pcm != NULL) v->iram_pcm_storage=ci->blocksizes[1]; | ||
173 | else v->iram_pcm_storage=0; | ||
174 | |||
175 | v->centerW=0; | ||
176 | |||
157 | /* Vorbis I uses only window type 0 */ | 177 | /* Vorbis I uses only window type 0 */ |
158 | b->window[0]=_vorbis_window(0,ci->blocksizes[0]/2); | 178 | b_size[0]=ci->blocksizes[0]/2; |
159 | b->window[1]=_vorbis_window(0,ci->blocksizes[1]/2); | 179 | b_size[1]=ci->blocksizes[1]/2; |
180 | b->window[0]=_vorbis_window(0,b_size[0]); | ||
181 | b->window[1]=_vorbis_window(0,b_size[1]); | ||
182 | |||
183 | /* allocate IRAM buffer for window tables too, if sufficient iram available */ | ||
184 | /* give preference to the larger window over the smaller window | ||
185 | (on the assumption that both windows are equally likely used) */ | ||
186 | for(i=1; i>=0; i--){ | ||
187 | iramposw=(LOOKUP_TNC *)iram_malloc(b_size[i]*sizeof(LOOKUP_TNC)); | ||
188 | if(iramposw!=NULL) { | ||
189 | memcpy(iramposw, b->window[i], b_size[i]*sizeof(LOOKUP_TNC)); | ||
190 | b->window[i]=iramposw; | ||
191 | } | ||
192 | } | ||
160 | 193 | ||
161 | /* finish the codebooks */ | 194 | /* finish the codebooks */ |
162 | if(!ci->fullbooks){ | 195 | if(!ci->fullbooks){ |
@@ -169,14 +202,34 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ | |||
169 | } | 202 | } |
170 | } | 203 | } |
171 | 204 | ||
205 | /* if we can get away with it, put a double buffer into IRAM too, so that | ||
206 | overlap-add runs iram-to-iram and we avoid needing to memcpy */ | ||
172 | v->pcm_storage=ci->blocksizes[1]; | 207 | v->pcm_storage=ci->blocksizes[1]; |
173 | v->pcm=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcm)); | 208 | v->pcm=_pcmp; |
174 | v->pcmb=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmb)); | 209 | v->pcmret=_pcmret; |
175 | v->pcmret=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmret)); | 210 | v->pcmb=_pcmbp; |
176 | 211 | ||
177 | for(i=0;i<vi->channels;i++) | 212 | _pcmp[0]=NULL; |
178 | v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i])); | 213 | _pcmp[1]=NULL; |
179 | 214 | _pcmbp[0]=NULL; | |
215 | _pcmbp[1]=NULL; | ||
216 | |||
217 | if(NULL != (internal_pcm = iram_malloc(vi->channels*v->pcm_storage*sizeof(ogg_int32_t)))) | ||
218 | { | ||
219 | /* one-time initialisation at codec start or on switch from | ||
220 | blocksizes greater than IRAM_PCM_END to sizes that fit */ | ||
221 | for(i=0;i<vi->channels;i++) | ||
222 | v->pcm[i]=&internal_pcm[i*v->pcm_storage]; | ||
223 | iram_pcm_doublebuffer = true; | ||
224 | } | ||
225 | else | ||
226 | { | ||
227 | /* one-time initialisation at codec start or on switch from | ||
228 | blocksizes that fit in IRAM_PCM_END to those that don't */ | ||
229 | for(i=0;i<vi->channels;i++) | ||
230 | v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i])); | ||
231 | iram_pcm_doublebuffer = false; | ||
232 | } | ||
180 | 233 | ||
181 | /* all 1 (large block) or 0 (small block) */ | 234 | /* all 1 (large block) or 0 (small block) */ |
182 | /* explicitly set for the sake of clarity */ | 235 | /* explicitly set for the sake of clarity */ |
@@ -203,13 +256,17 @@ int vorbis_synthesis_restart(vorbis_dsp_state *v){ | |||
203 | ci=vi->codec_setup; | 256 | ci=vi->codec_setup; |
204 | if(!ci)return -1; | 257 | if(!ci)return -1; |
205 | 258 | ||
206 | v->centerW=ci->blocksizes[1]/2; | 259 | v->centerW=0; |
207 | v->pcm_current=v->centerW; | 260 | v->pcm_current=0; |
208 | 261 | ||
209 | v->pcm_returned=-1; | 262 | v->pcm_returned=-1; |
210 | v->granulepos=-1; | 263 | v->granulepos=-1; |
211 | v->sequence=-1; | 264 | v->sequence=-1; |
212 | ((private_state *)(v->backend_state))->sample_count=-1; | 265 | ((private_state *)(v->backend_state))->sample_count=-1; |
266 | |||
267 | /* indicate to synthesis code that buffer pointers no longer valid | ||
268 | (if we're using double pcm buffer) and will need to reset them */ | ||
269 | v->reset_pcmb = true; | ||
213 | 270 | ||
214 | return(0); | 271 | return(0); |
215 | } | 272 | } |
@@ -228,11 +285,12 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){ | |||
228 | codec_setup_info *ci=(codec_setup_info *)(vi?vi->codec_setup:NULL); | 285 | codec_setup_info *ci=(codec_setup_info *)(vi?vi->codec_setup:NULL); |
229 | private_state *b=(private_state *)v->backend_state; | 286 | private_state *b=(private_state *)v->backend_state; |
230 | 287 | ||
231 | if(v->pcm){ | 288 | if(!iram_pcm_doublebuffer) |
232 | for(i=0;i<vi->channels;i++) | 289 | { |
233 | if(v->pcm[i])_ogg_free(v->pcm[i]); | 290 | if(v->pcm){ |
234 | _ogg_free(v->pcm); | 291 | for(i=0;i<vi->channels;i++) |
235 | if(v->pcmret)_ogg_free(v->pcmret); | 292 | if(v->pcm[i])_ogg_free(v->pcm[i]); |
293 | } | ||
236 | } | 294 | } |
237 | 295 | ||
238 | /* free mode lookups; these are actually vorbis_look_mapping structs */ | 296 | /* free mode lookups; these are actually vorbis_look_mapping structs */ |
@@ -258,7 +316,7 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){ | |||
258 | calling (as it relies on the previous block). */ | 316 | calling (as it relies on the previous block). */ |
259 | 317 | ||
260 | int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb) | 318 | int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb) |
261 | ICODE_ATTR_TREMOR_NOT_MDCT; | 319 | ICODE_ATTR; |
262 | int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | 320 | int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ |
263 | vorbis_info *vi=v->vi; | 321 | vorbis_info *vi=v->vi; |
264 | codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; | 322 | codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; |
@@ -278,85 +336,91 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | |||
278 | } | 336 | } |
279 | 337 | ||
280 | v->sequence=vb->sequence; | 338 | v->sequence=vb->sequence; |
339 | int n=ci->blocksizes[v->W]/2; | ||
340 | int ln=ci->blocksizes[v->lW]/2; | ||
281 | 341 | ||
282 | if(vb->pcm){ /* no pcm to process if vorbis_synthesis_trackonly | 342 | if(LIKELY(vb->pcm)){ /* no pcm to process if vorbis_synthesis_trackonly |
283 | was called on block */ | 343 | was called on block */ |
284 | int n=ci->blocksizes[v->W]/2; | 344 | int prevCenter; |
285 | int n0=ci->blocksizes[0]/2; | 345 | int n0=ci->blocksizes[0]/2; |
286 | int n1=ci->blocksizes[1]/2; | 346 | int n1=ci->blocksizes[1]/2; |
287 | 347 | ||
288 | int thisCenter; | 348 | if(iram_pcm_doublebuffer) |
289 | int prevCenter; | 349 | { |
290 | 350 | prevCenter = ln; | |
291 | if(v->centerW){ | 351 | } |
292 | thisCenter=n1; | 352 | else |
293 | prevCenter=0; | 353 | { |
294 | }else{ | 354 | prevCenter = v->centerW; |
295 | thisCenter=0; | 355 | v->centerW = n1 - v->centerW; |
296 | prevCenter=n1; | ||
297 | } | 356 | } |
298 | |||
299 | /* v->pcm is now used like a two-stage double buffer. We don't want | ||
300 | to have to constantly shift *or* adjust memory usage. Don't | ||
301 | accept a new block until the old is shifted out */ | ||
302 | 357 | ||
303 | /* overlap/add PCM */ | 358 | /* overlap/add PCM */ |
304 | 359 | /* nb nothing to overlap with on first block so don't bother */ | |
305 | for(j=0;j<vi->channels;j++){ | 360 | if(LIKELY(v->pcm_returned!=-1)) |
306 | /* the overlap/add section */ | 361 | { |
307 | if(v->lW){ | 362 | for(j=0;j<vi->channels;j++) |
308 | if(v->W){ | 363 | { |
309 | /* large/large */ | 364 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; |
310 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; | 365 | ogg_int32_t *p=vb->pcm[j]; |
311 | ogg_int32_t *p=vb->pcm[j]; | 366 | |
312 | vect_add(p, pcm, n1); | 367 | /* the overlap/add section */ |
313 | v->pcmb[j]=p; | 368 | if(v->lW == v->W) |
314 | }else{ | 369 | { |
315 | /* large/small */ | 370 | /* large/large or small/small */ |
316 | ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; | 371 | vect_add_right_left(pcm,p,n); |
317 | ogg_int32_t *p=vb->pcm[j]; | 372 | v->pcmb[j]=pcm; |
318 | vect_add(pcm, p, n0); | ||
319 | v->pcmb[j]=v->pcm[j]+prevCenter; | ||
320 | } | 373 | } |
321 | }else{ | 374 | else if (!v->W) |
322 | if(v->W){ | 375 | { |
323 | /* small/large */ | 376 | /* large/small */ |
324 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; | 377 | vect_add_right_left(pcm + (n1-n0)/2, p, n0); |
325 | ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; | 378 | v->pcmb[j]=pcm; |
326 | vect_add(p, pcm, n0); | 379 | } |
327 | v->pcmb[j]=p; | 380 | else |
328 | }else{ | 381 | { |
329 | /* small/small */ | 382 | /* small/large */ |
330 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; | 383 | p += (n1-n0)/2; |
331 | ogg_int32_t *p=vb->pcm[j]; | 384 | vect_add_left_right(p,pcm,n0); |
332 | vect_add(p, pcm, n0); | 385 | v->pcmb[j]=p; |
333 | v->pcmb[j]=p; | ||
334 | } | 386 | } |
335 | } | 387 | } |
336 | 388 | } | |
337 | /* the copy section */ | 389 | |
390 | /* the copy section */ | ||
391 | if(iram_pcm_doublebuffer) | ||
392 | { | ||
393 | /* just flip the pointers over as we have a double buffer in iram */ | ||
394 | ogg_int32_t *p; | ||
395 | p=v->pcm[0]; | ||
396 | v->pcm[0]=vb->pcm[0]; | ||
397 | vb->pcm[0] = p; | ||
398 | p=v->pcm[1]; | ||
399 | v->pcm[1]=vb->pcm[1]; | ||
400 | vb->pcm[1] = p; | ||
401 | } | ||
402 | else | ||
403 | { | ||
404 | for(j=0;j<vi->channels;j++) | ||
338 | { | 405 | { |
339 | ogg_int32_t *pcm=v->pcm[j]+thisCenter; | 406 | /* at best only vb->pcm is in iram, and that's where we do the |
340 | ogg_int32_t *p=vb->pcm[j]+n; | 407 | synthesis, so we copy out the right-hand subframe of last |
341 | vect_copy(pcm, p, n); | 408 | synthesis into (noniram) local buffer so we can still do |
409 | synth in iram */ | ||
410 | vect_copy(v->pcm[j]+v->centerW, vb->pcm[j]+n, n); | ||
342 | } | 411 | } |
343 | } | 412 | } |
344 | 413 | ||
345 | if(v->centerW) | ||
346 | v->centerW=0; | ||
347 | else | ||
348 | v->centerW=n1; | ||
349 | |||
350 | /* deal with initial packet state; we do this using the explicit | 414 | /* deal with initial packet state; we do this using the explicit |
351 | pcm_returned==-1 flag otherwise we're sensitive to first block | 415 | pcm_returned==-1 flag otherwise we're sensitive to first block |
352 | being short or long */ | 416 | being short or long */ |
353 | 417 | ||
354 | if(v->pcm_returned==-1){ | 418 | if(v->pcm_returned==-1){ |
355 | v->pcm_returned=thisCenter; | 419 | v->pcm_returned=0; |
356 | v->pcm_current=thisCenter; | 420 | v->pcm_current=0; |
357 | }else{ | 421 | }else{ |
358 | v->pcm_returned=0; | 422 | v->pcm_returned=0; |
359 | v->pcm_current=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4; | 423 | v->pcm_current=(n+ln)/2; |
360 | } | 424 | } |
361 | 425 | ||
362 | } | 426 | } |
@@ -375,7 +439,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | |||
375 | if(b->sample_count==-1){ | 439 | if(b->sample_count==-1){ |
376 | b->sample_count=0; | 440 | b->sample_count=0; |
377 | }else{ | 441 | }else{ |
378 | b->sample_count+=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4; | 442 | b->sample_count+=(n+ln)/2; |
379 | } | 443 | } |
380 | 444 | ||
381 | if(v->granulepos==-1){ | 445 | if(v->granulepos==-1){ |
@@ -406,7 +470,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | |||
406 | 470 | ||
407 | } | 471 | } |
408 | }else{ | 472 | }else{ |
409 | v->granulepos+=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4; | 473 | v->granulepos+=(n+ln)/2; |
410 | if(vb->granulepos!=-1 && v->granulepos!=vb->granulepos){ | 474 | if(vb->granulepos!=-1 && v->granulepos!=vb->granulepos){ |
411 | 475 | ||
412 | if(v->granulepos>vb->granulepos){ | 476 | if(v->granulepos>vb->granulepos){ |
diff --git a/apps/codecs/libtremor/config-tremor.h b/apps/codecs/libtremor/config-tremor.h index 7cfcb7e35d..d360b5fd73 100644 --- a/apps/codecs/libtremor/config-tremor.h +++ b/apps/codecs/libtremor/config-tremor.h | |||
@@ -13,10 +13,6 @@ | |||
13 | #define BIG_ENDIAN 0 | 13 | #define BIG_ENDIAN 0 |
14 | #endif | 14 | #endif |
15 | 15 | ||
16 | #ifndef ICONST_ATTR_TREMOR_WINDOW | ||
17 | #define ICONST_ATTR_TREMOR_WINDOW ICONST_ATTR | ||
18 | #endif | ||
19 | |||
20 | #ifndef ICODE_ATTR_TREMOR_MDCT | 16 | #ifndef ICODE_ATTR_TREMOR_MDCT |
21 | #define ICODE_ATTR_TREMOR_MDCT ICODE_ATTR | 17 | #define ICODE_ATTR_TREMOR_MDCT ICODE_ATTR |
22 | #endif | 18 | #endif |
@@ -25,4 +21,29 @@ | |||
25 | #define ICODE_ATTR_TREMOR_NOT_MDCT ICODE_ATTR | 21 | #define ICODE_ATTR_TREMOR_NOT_MDCT ICODE_ATTR |
26 | #endif | 22 | #endif |
27 | 23 | ||
24 | /* Define CPU of large IRAM (MCF5250) */ | ||
25 | #if (CONFIG_CPU == MCF5250) | ||
26 | /* PCM_BUFFER : 32768 Byte (4096*2*4) * | ||
27 | * WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) * | ||
28 | * TOTAL : 37376 */ | ||
29 | #define IRAM_IBSS_SIZE 37376 | ||
30 | |||
31 | /* Define CPU of large IRAM (PP5022/5024) */ | ||
32 | #elif (CONFIG_CPU == PP5022) || (CONFIG_CPU == PP5024) | ||
33 | /* PCM_BUFFER : 32768 byte (4096*2*4 or 2048*4*4) * | ||
34 | * WINDOW_LOOKUP : 9216 Byte (256*4 + 2048*4) * | ||
35 | * TOTAL : 41984 */ | ||
36 | #define IRAM_IBSS_SIZE 41984 | ||
37 | |||
38 | /* Define CPU of Normal IRAM (96KB) (and SIM also) */ | ||
39 | #else | ||
40 | /* PCM_BUFFER : 16384 Byte (2048*2*4) * | ||
41 | * WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) * | ||
42 | * TOTAL : 20992 */ | ||
43 | #define IRAM_IBSS_SIZE 20992 | ||
44 | #endif | ||
45 | |||
46 | /* max 2 channels */ | ||
47 | #define CHANNELS 2 | ||
48 | |||
28 | // #define _LOW_ACCURACY_ | 49 | // #define _LOW_ACCURACY_ |
diff --git a/apps/codecs/libtremor/ivorbiscodec.h b/apps/codecs/libtremor/ivorbiscodec.h index 2574a11f2a..a9526d56a6 100644 --- a/apps/codecs/libtremor/ivorbiscodec.h +++ b/apps/codecs/libtremor/ivorbiscodec.h | |||
@@ -76,6 +76,10 @@ typedef struct vorbis_dsp_state{ | |||
76 | ogg_int64_t sequence; | 76 | ogg_int64_t sequence; |
77 | 77 | ||
78 | void *backend_state; | 78 | void *backend_state; |
79 | |||
80 | ogg_int32_t *iram_pcm; /* IRAM PCM buffer */ | ||
81 | int iram_pcm_storage; /* size of IRAM PCM buffer */ | ||
82 | bool reset_pcmb; | ||
79 | } vorbis_dsp_state; | 83 | } vorbis_dsp_state; |
80 | 84 | ||
81 | typedef struct vorbis_block{ | 85 | typedef struct vorbis_block{ |
diff --git a/apps/codecs/libtremor/mapping0.c b/apps/codecs/libtremor/mapping0.c index 2bb7a46d79..8b2343c56f 100644 --- a/apps/codecs/libtremor/mapping0.c +++ b/apps/codecs/libtremor/mapping0.c | |||
@@ -182,8 +182,6 @@ static vorbis_info_mapping *mapping0_unpack(vorbis_info *vi,oggpack_buffer *opb) | |||
182 | 182 | ||
183 | static int seq = 0; | 183 | static int seq = 0; |
184 | 184 | ||
185 | #define CHANNELS 2 /* max 2 channels on the ihp-1xx (stereo) */ | ||
186 | |||
187 | static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){ | 185 | static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){ |
188 | vorbis_dsp_state *vd=vb->vd; | 186 | vorbis_dsp_state *vd=vb->vd; |
189 | vorbis_info *vi=vd->vi; | 187 | vorbis_info *vi=vd->vi; |
diff --git a/apps/codecs/libtremor/misc.h b/apps/codecs/libtremor/misc.h index e94236c2a8..59ce6dbb74 100644 --- a/apps/codecs/libtremor/misc.h +++ b/apps/codecs/libtremor/misc.h | |||
@@ -155,8 +155,11 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, | |||
155 | #ifndef _V_VECT_OPS | 155 | #ifndef _V_VECT_OPS |
156 | #define _V_VECT_OPS | 156 | #define _V_VECT_OPS |
157 | 157 | ||
158 | /* generic misc.h has symmetrical versions of vect_add_right_left | ||
159 | and vect_add_left_right (since symmetrical versions of | ||
160 | vect_mult_fw and vect_mult_bw i.e. both use MULT31) */ | ||
158 | static inline | 161 | static inline |
159 | void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) | 162 | void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n) |
160 | { | 163 | { |
161 | while (n>0) { | 164 | while (n>0) { |
162 | *x++ += *y++; | 165 | *x++ += *y++; |
@@ -164,13 +167,10 @@ void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) | |||
164 | } | 167 | } |
165 | } | 168 | } |
166 | 169 | ||
167 | static inline | 170 | static inline |
168 | void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) | 171 | void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n) |
169 | { | 172 | { |
170 | while (n>0) { | 173 | vect_add_right_left(x,y,n); |
171 | *x++ = *y++; | ||
172 | n--; | ||
173 | } | ||
174 | } | 174 | } |
175 | 175 | ||
176 | static inline | 176 | static inline |
@@ -194,6 +194,12 @@ void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | |||
194 | n--; | 194 | n--; |
195 | } | 195 | } |
196 | } | 196 | } |
197 | |||
198 | /* generic memcpy is probably optimal */ | ||
199 | static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n) | ||
200 | { | ||
201 | memcpy(x,y,n*sizeof(ogg_int32_t)); | ||
202 | } | ||
197 | #endif | 203 | #endif |
198 | 204 | ||
199 | #endif | 205 | #endif |
diff --git a/apps/codecs/libtremor/oggmalloc.c b/apps/codecs/libtremor/oggmalloc.c index 4aa2760629..6da7cfcedc 100644 --- a/apps/codecs/libtremor/oggmalloc.c +++ b/apps/codecs/libtremor/oggmalloc.c | |||
@@ -81,3 +81,27 @@ void ogg_tmpmalloc_free(long pos) | |||
81 | { | 81 | { |
82 | tmp_ptr = pos; | 82 | tmp_ptr = pos; |
83 | } | 83 | } |
84 | |||
85 | /* Allocate IRAM buffer */ | ||
86 | static unsigned char iram_buff[IRAM_IBSS_SIZE] IBSS_ATTR __attribute__ ((aligned (16))); | ||
87 | static size_t iram_remain; | ||
88 | |||
89 | void iram_malloc_init(void){ | ||
90 | iram_remain=IRAM_IBSS_SIZE; | ||
91 | } | ||
92 | |||
93 | void *iram_malloc(size_t size){ | ||
94 | void* x; | ||
95 | |||
96 | /* always ensure 16-byte aligned */ | ||
97 | if(size&0x0f) | ||
98 | size=(size-(size&0x0f))+16; | ||
99 | |||
100 | if(size>iram_remain) | ||
101 | return NULL; | ||
102 | |||
103 | x = &iram_buff[IRAM_IBSS_SIZE-iram_remain]; | ||
104 | iram_remain-=size; | ||
105 | |||
106 | return x; | ||
107 | } | ||
diff --git a/apps/codecs/libtremor/os_types.h b/apps/codecs/libtremor/os_types.h index 5738ef4911..4c7d17ef3a 100644 --- a/apps/codecs/libtremor/os_types.h +++ b/apps/codecs/libtremor/os_types.h | |||
@@ -25,9 +25,11 @@ | |||
25 | #ifdef _LOW_ACCURACY_ | 25 | #ifdef _LOW_ACCURACY_ |
26 | # define X(n) (((((n)>>22)+1)>>1) - ((((n)>>22)+1)>>9)) | 26 | # define X(n) (((((n)>>22)+1)>>1) - ((((n)>>22)+1)>>9)) |
27 | # define LOOKUP_T const unsigned char | 27 | # define LOOKUP_T const unsigned char |
28 | # define LOOKUP_TNC unsigned char | ||
28 | #else | 29 | #else |
29 | # define X(n) (n) | 30 | # define X(n) (n) |
30 | # define LOOKUP_T const ogg_int32_t | 31 | # define LOOKUP_T const ogg_int32_t |
32 | # define LOOKUP_TNC ogg_int32_t | ||
31 | #endif | 33 | #endif |
32 | 34 | ||
33 | /* make it easy on the folks that want to compile the libs with a | 35 | /* make it easy on the folks that want to compile the libs with a |
@@ -46,6 +48,8 @@ void *ogg_tmpcalloc(size_t nmemb, size_t size); | |||
46 | void *ogg_realloc(void *ptr, size_t size); | 48 | void *ogg_realloc(void *ptr, size_t size); |
47 | long ogg_tmpmalloc_pos(void); | 49 | long ogg_tmpmalloc_pos(void); |
48 | void ogg_tmpmalloc_free(long pos); | 50 | void ogg_tmpmalloc_free(long pos); |
51 | void iram_malloc_init(void); | ||
52 | void *iram_malloc(size_t size); | ||
49 | 53 | ||
50 | typedef short ogg_int16_t; | 54 | typedef short ogg_int16_t; |
51 | typedef int ogg_int32_t; | 55 | typedef int ogg_int32_t; |
diff --git a/apps/codecs/libtremor/res012.c b/apps/codecs/libtremor/res012.c index 46b782def1..a42660a065 100644 --- a/apps/codecs/libtremor/res012.c +++ b/apps/codecs/libtremor/res012.c | |||
@@ -172,8 +172,6 @@ static vorbis_look_residue *res0_look(vorbis_dsp_state *vd,vorbis_info_mode *vm, | |||
172 | return(look); | 172 | return(look); |
173 | } | 173 | } |
174 | 174 | ||
175 | #define CHANNELS 2 | ||
176 | |||
177 | /* a truncated packet here just means 'stop working'; it's not an error */ | 175 | /* a truncated packet here just means 'stop working'; it's not an error */ |
178 | static int _01inverse(vorbis_block *vb,vorbis_look_residue *vl, | 176 | static int _01inverse(vorbis_block *vb,vorbis_look_residue *vl, |
179 | ogg_int32_t **in,int ch, | 177 | ogg_int32_t **in,int ch, |
diff --git a/apps/codecs/libtremor/synthesis.c b/apps/codecs/libtremor/synthesis.c index cef240e796..b1c5eeccef 100644 --- a/apps/codecs/libtremor/synthesis.c +++ b/apps/codecs/libtremor/synthesis.c | |||
@@ -25,15 +25,7 @@ | |||
25 | #include "os.h" | 25 | #include "os.h" |
26 | 26 | ||
27 | 27 | ||
28 | /* IRAM buffer keep the block pcm data; only for windows size upto 2048 | ||
29 | for space restrictions. | ||
30 | libVorbis 1.1 Oggenc doesn't use larger windows anyway. */ | ||
31 | /* max 2 channels on the ihp-1xx (stereo), 2048 samples (2*2048*4=16Kb) */ | ||
32 | #define IRAM_PCM_END 2048 | ||
33 | #define CHANNELS 2 | ||
34 | |||
35 | static ogg_int32_t *ipcm_vect[CHANNELS] IBSS_ATTR; | 28 | static ogg_int32_t *ipcm_vect[CHANNELS] IBSS_ATTR; |
36 | static ogg_int32_t ipcm_buff[CHANNELS*IRAM_PCM_END] IBSS_ATTR LINE_ATTR; | ||
37 | 29 | ||
38 | int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep) | 30 | int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep) |
39 | ICODE_ATTR_TREMOR_NOT_MDCT; | 31 | ICODE_ATTR_TREMOR_NOT_MDCT; |
@@ -76,23 +68,33 @@ int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){ | |||
76 | vb->eofflag=op->e_o_s; | 68 | vb->eofflag=op->e_o_s; |
77 | 69 | ||
78 | if(decodep && vi->channels<=CHANNELS){ | 70 | if(decodep && vi->channels<=CHANNELS){ |
71 | vb->pcm = ipcm_vect; | ||
72 | |||
79 | /* alloc pcm passback storage */ | 73 | /* alloc pcm passback storage */ |
80 | vb->pcmend=ci->blocksizes[vb->W]; | 74 | vb->pcmend=ci->blocksizes[vb->W]; |
81 | if (vb->pcmend<=IRAM_PCM_END) { | 75 | if (vd->iram_pcm_storage >= vb->pcmend) { |
82 | /* use statically allocated iram buffer */ | 76 | /* use statically allocated iram buffer */ |
83 | vb->pcm = ipcm_vect; | 77 | if(vd->reset_pcmb || vb->pcm[0]==NULL) |
84 | for(i=0; i<CHANNELS; i++) | 78 | { |
85 | vb->pcm[i] = &ipcm_buff[i*IRAM_PCM_END]; | 79 | /* one-time initialisation at codec start |
80 | NOT for every block synthesis start | ||
81 | allows us to flip between buffers once initialised | ||
82 | by simply flipping pointers */ | ||
83 | for(i=0; i<vi->channels; i++) | ||
84 | vb->pcm[i] = &vd->iram_pcm[i*vd->iram_pcm_storage]; | ||
85 | } | ||
86 | } else { | 86 | } else { |
87 | /* dynamic allocation (slower) */ | 87 | if(vd->reset_pcmb || vb->pcm[0]==NULL) |
88 | vb->pcm=(ogg_int32_t **)_vorbis_block_alloc(vb,sizeof(*vb->pcm)*vi->channels); | 88 | { |
89 | for(i=0;i<vi->channels;i++) | 89 | /* dynamic allocation (slower) */ |
90 | vb->pcm[i]=(ogg_int32_t *)_vorbis_block_alloc(vb,vb->pcmend*sizeof(*vb->pcm[i])); | 90 | for(i=0;i<vi->channels;i++) |
91 | vb->pcm[i]=(ogg_int32_t *)_vorbis_block_alloc(vb,vb->pcmend*sizeof(*vb->pcm[i])); | ||
92 | } | ||
91 | } | 93 | } |
94 | vd->reset_pcmb = false; | ||
92 | 95 | ||
93 | /* unpack_header enforces range checking */ | 96 | /* unpack_header enforces range checking */ |
94 | type=ci->map_type[ci->mode_param[mode]->mapping]; | 97 | type=ci->map_type[ci->mode_param[mode]->mapping]; |
95 | |||
96 | return(_mapping_P[type]->inverse(vb,b->mode[mode])); | 98 | return(_mapping_P[type]->inverse(vb,b->mode[mode])); |
97 | }else{ | 99 | }else{ |
98 | /* no pcm */ | 100 | /* no pcm */ |
diff --git a/apps/codecs/libtremor/window.c b/apps/codecs/libtremor/window.c index 14d97cf6ac..7b48886939 100644 --- a/apps/codecs/libtremor/window.c +++ b/apps/codecs/libtremor/window.c | |||
@@ -68,11 +68,15 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2], | |||
68 | long rightbegin=n/2+n/4-rn/4; | 68 | long rightbegin=n/2+n/4-rn/4; |
69 | long rightend=rightbegin+rn/2; | 69 | long rightend=rightbegin+rn/2; |
70 | 70 | ||
71 | memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); | 71 | /* Following memset is not required - we are careful to only overlap/add the |
72 | /* mcf5249_vect_zero(&d[0], leftbegin); */ | 72 | regions that geniunely overlap in the window region, and the portions |
73 | outside that region are not added (so don't need to be zerod). see block.c | ||
74 | memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); */ | ||
75 | |||
73 | vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin); | 76 | vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin); |
74 | vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin); | 77 | vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin); |
75 | memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); | 78 | |
76 | /* mcf5249_vect_zero(&d[rightend], n-rightend); */ | 79 | /* Again - memset not needed |
80 | memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */ | ||
77 | } | 81 | } |
78 | 82 | ||
diff --git a/apps/codecs/libtremor/window_lookup.h b/apps/codecs/libtremor/window_lookup.h index ccf316e227..5363b81042 100644 --- a/apps/codecs/libtremor/window_lookup.h +++ b/apps/codecs/libtremor/window_lookup.h | |||
@@ -51,7 +51,7 @@ static LOOKUP_T vwin128[64] = { | |||
51 | X(0x7ffdcf39), X(0x7fff6dac), X(0x7fffed01), X(0x7fffffc4), | 51 | X(0x7ffdcf39), X(0x7fff6dac), X(0x7fffed01), X(0x7fffffc4), |
52 | }; | 52 | }; |
53 | 53 | ||
54 | static LOOKUP_T vwin256[128] ICONST_ATTR_TREMOR_WINDOW = { | 54 | static LOOKUP_T vwin256[128] = { |
55 | X(0x0001f018), X(0x00117066), X(0x00306e9e), X(0x005ee5f1), | 55 | X(0x0001f018), X(0x00117066), X(0x00306e9e), X(0x005ee5f1), |
56 | X(0x009ccf26), X(0x00ea208b), X(0x0146cdea), X(0x01b2c87f), | 56 | X(0x009ccf26), X(0x00ea208b), X(0x0146cdea), X(0x01b2c87f), |
57 | X(0x022dfedf), X(0x02b85ced), X(0x0351cbbd), X(0x03fa317f), | 57 | X(0x022dfedf), X(0x02b85ced), X(0x0351cbbd), X(0x03fa317f), |
@@ -284,7 +284,7 @@ static LOOKUP_T vwin1024[512] = { | |||
284 | X(0x7fffffdd), X(0x7ffffff7), X(0x7fffffff), X(0x7fffffff), | 284 | X(0x7fffffdd), X(0x7ffffff7), X(0x7fffffff), X(0x7fffffff), |
285 | }; | 285 | }; |
286 | 286 | ||
287 | static LOOKUP_T vwin2048[1024] ICONST_ATTR_TREMOR_WINDOW = { | 287 | static LOOKUP_T vwin2048[1024] = { |
288 | X(0x000007c0), X(0x000045c4), X(0x0000c1ca), X(0x00017bd3), | 288 | X(0x000007c0), X(0x000045c4), X(0x0000c1ca), X(0x00017bd3), |
289 | X(0x000273de), X(0x0003a9eb), X(0x00051df9), X(0x0006d007), | 289 | X(0x000273de), X(0x0003a9eb), X(0x00051df9), X(0x0006d007), |
290 | X(0x0008c014), X(0x000aee1e), X(0x000d5a25), X(0x00100428), | 290 | X(0x0008c014), X(0x000aee1e), X(0x000d5a25), X(0x00100428), |