summaryrefslogtreecommitdiff
path: root/utils/rk27utils
diff options
context:
space:
mode:
authorMarcin Bukat <marcin.bukat@gmail.com>2013-09-02 12:35:47 +0200
committerMarcin Bukat <marcin.bukat@gmail.com>2013-09-02 12:35:47 +0200
commitf182a11f3362017a6c669871414a9bb448ee050d (patch)
tree5afc1ba1713af14aea513ca9b53d00369e083447 /utils/rk27utils
parentb97cdc8f5efa8b447e1e1398d86eb87c80ed4b22 (diff)
downloadrockbox-f182a11f3362017a6c669871414a9bb448ee050d.tar.gz
rockbox-f182a11f3362017a6c669871414a9bb448ee050d.zip
rk27utils: Add nandextract utility
This quick and dirty utility allows to extract nand bootloader from raw 1st nand block dump. I post it mainly to somewhat document how BCH error correction engine of the rk27xx works. Change-Id: I37ca91add7d372e3576d2722afc946d0f08971a9
Diffstat (limited to 'utils/rk27utils')
-rw-r--r--utils/rk27utils/README34
-rw-r--r--utils/rk27utils/nandextract/Makefile7
-rw-r--r--utils/rk27utils/nandextract/libbch.c1393
-rw-r--r--utils/rk27utils/nandextract/libbch.h113
-rw-r--r--utils/rk27utils/nandextract/nandextract.c235
5 files changed, 1782 insertions, 0 deletions
diff --git a/utils/rk27utils/README b/utils/rk27utils/README
index a43d69a88f..d0343580b8 100644
--- a/utils/rk27utils/README
+++ b/utils/rk27utils/README
@@ -35,3 +35,37 @@ This directory contains tool which sends custom scsi commands to the
35rockchip player. 35rockchip player.
36 36
37You need libusb-1.0 + header files in order to compile this utility. 37You need libusb-1.0 + header files in order to compile this utility.
38
39nandextract
40This directory contains quick and dirty tool which allows to extract
41nand bootloader from raw dump of the first nand block. The main reason
42I post this tool is to somewhat document error correction scheme used by
43rk27xx chip. The tool implements BCH error correction processing with
44help of bch library taken from linux kernel (and slightly modified to
45compile standalone). Error correction is SUPER important as the nands used
46in cheap rk27 players have quite high error rates.
47
48Nand controler in rk27xx chip implements hw BCH error correction engine.
49The documentation is lacking so this info was obtained from RE and
50various other sources.
51The data on the nand is stored in 528 bytes long chunks - 512 bytes
52of actual data followed by 3 bytes of metadata (used by FTL layer to mark
53special sectors) followed by 13 bytes of BCH ECC. BCH algorithm
54uses m=13, t=8 and primitive polynomial 0x25af. Special masking
55is used such as empty sector (with all 0xff) gives all 0xff ECC bytes.
56Quoting e-mail from Ivan Djelic (the author of bch lib in linux):
57To summarize, the steps needed to compute the rk27xx ecc are the following:
581. Reverse bits in each input byte
592. Call encode_bch()
603. Reverse output bits in each computed ecc byte
614. Add a polynomial in order to get only 0xff ecc bytes for a blank page
62For more details you need to read the code.
63
64Another quirk is that rom loader assumes that there are 4 sectors in each
65nand page. This is actually not true for newer nand chips with page size
66bigger then 2k. That means that on newer 4k page chips only first half of
67every page is used in nand bootloader area. This is for compatibility reasons
68most probably.
69
70Finally, every 512 bytes block of data is encoded with rc4 algorithm.
71The key and routine were recovered from rk27xx rom dump by AleMaxx.
diff --git a/utils/rk27utils/nandextract/Makefile b/utils/rk27utils/nandextract/Makefile
new file mode 100644
index 0000000000..096b26e893
--- /dev/null
+++ b/utils/rk27utils/nandextract/Makefile
@@ -0,0 +1,7 @@
1all: nandextract
2
3nandextract: nandextract.c libbch.c
4 gcc -g -std=c99 -o $@ -W -Wall $^
5
6clean:
7 rm -fr nandextract
diff --git a/utils/rk27utils/nandextract/libbch.c b/utils/rk27utils/nandextract/libbch.c
new file mode 100644
index 0000000000..04485f3512
--- /dev/null
+++ b/utils/rk27utils/nandextract/libbch.c
@@ -0,0 +1,1393 @@
1/*
2 * Generic binary BCH encoding/decoding library
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 51
15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 * Copyright © 2011 Parrot S.A.
18 *
19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
20 *
21 * Description:
22 *
23 * This library provides runtime configurable encoding/decoding of binary
24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
25 *
26 * Call init_bch to get a pointer to a newly allocated bch_control structure for
27 * the given m (Galois field order), t (error correction capability) and
28 * (optional) primitive polynomial parameters.
29 *
30 * Call encode_bch to compute and store ecc parity bytes to a given buffer.
31 * Call decode_bch to detect and locate errors in received data.
32 *
33 * On systems supporting hw BCH features, intermediate results may be provided
34 * to decode_bch in order to skip certain steps. See decode_bch() documentation
35 * for details.
36 *
37 * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
38 * parameters m and t; thus allowing extra compiler optimizations and providing
39 * better (up to 2x) encoding performance. Using this option makes sense when
40 * (m,t) are fixed and known in advance, e.g. when using BCH error correction
41 * on a particular NAND flash device.
42 *
43 * Algorithmic details:
44 *
45 * Encoding is performed by processing 32 input bits in parallel, using 4
46 * remainder lookup tables.
47 *
48 * The final stage of decoding involves the following internal steps:
49 * a. Syndrome computation
50 * b. Error locator polynomial computation using Berlekamp-Massey algorithm
51 * c. Error locator root finding (by far the most expensive step)
52 *
53 * In this implementation, step c is not performed using the usual Chien search.
54 * Instead, an alternative approach described in [1] is used. It consists in
55 * factoring the error locator polynomial using the Berlekamp Trace algorithm
56 * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
57 * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
58 * much better performance than Chien search for usual (m,t) values (typically
59 * m >= 13, t < 32, see [1]).
60 *
61 * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
62 * of characteristic 2, in: Western European Workshop on Research in Cryptology
63 * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
64 * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
65 * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
66 */
67
68#include <stdlib.h>
69#include <string.h>
70#include <errno.h>
71#include "libbch.h"
72
73/* glue code */
74#define kmalloc(x,y) malloc(x)
75#define kfree(x) free(x)
76
77static void *kzalloc(size_t size, int flags)
78{
79 (void)flags;
80 void *p = malloc(size);
81
82 if (p)
83 memset(p, 0, size);
84
85 return p;
86}
87
88#define EXPORT_SYMBOL_GPL(x)
89#define MODULE_LICENSE(x)
90#define MODULE_AUTHOR(x)
91#define MODULE_DESCRIPTION(x)
92
93#define DIV_ROUND_UP(n,d) (((n) + (d) - 1)/(d))
94
95#include <arpa/inet.h>
96#define cpu_to_be32(x) htonl(x)
97
98static inline int fls(int x)
99{
100 int r = 32;
101 if (!x)
102 return 0;
103
104 if (!(x & 0xffff0000u)) {
105 x <<= 16;
106 r -= 16;
107 }
108
109 if (!(x & 0xff000000u)) {
110 x <<= 8;
111 r -= 8;
112 }
113
114 if (!(x & 0xf0000000u)) {
115 x <<= 4;
116 r -= 4;
117 }
118
119 if (!(x & 0xc0000000u)) {
120 x <<= 2;
121 r -= 2;
122 }
123
124 if (!(x & 0x80000000u)) {
125 x <<= 1;
126 r -= 1;
127 }
128
129 return r;
130}
131
132#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
133#define GFP_KERNEL 0
134/* end of glue code */
135
136/*
137 * same as encode_bch(), but process input data one byte at a time
138 */
139static void encode_bch_unaligned(struct bch_control *bch,
140 const unsigned char *data, unsigned int len,
141 uint32_t *ecc)
142{
143 int i;
144 const uint32_t *p;
145 const int l = BCH_ECC_WORDS(bch)-1;
146
147 while (len--) {
148 p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(*data++)) & 0xff);
149
150 for (i = 0; i < l; i++)
151 ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
152
153 ecc[l] = (ecc[l] << 8)^(*p);
154 }
155}
156
157/*
158 * convert ecc bytes to aligned, zero-padded 32-bit ecc words
159 */
160static void load_ecc8(struct bch_control *bch, uint32_t *dst,
161 const uint8_t *src)
162{
163 uint8_t pad[4] = {0, 0, 0, 0};
164 unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
165
166 for (i = 0; i < nwords; i++, src += 4)
167 dst[i] = (src[0] << 24)|(src[1] << 16)|(src[2] << 8)|src[3];
168
169 memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
170 dst[nwords] = (pad[0] << 24)|(pad[1] << 16)|(pad[2] << 8)|pad[3];
171}
172
173/*
174 * convert 32-bit ecc words to ecc bytes
175 */
176static void store_ecc8(struct bch_control *bch, uint8_t *dst,
177 const uint32_t *src)
178{
179 uint8_t pad[4];
180 unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
181
182 for (i = 0; i < nwords; i++) {
183 *dst++ = (src[i] >> 24);
184 *dst++ = (src[i] >> 16) & 0xff;
185 *dst++ = (src[i] >> 8) & 0xff;
186 *dst++ = (src[i] >> 0) & 0xff;
187 }
188 pad[0] = (src[nwords] >> 24);
189 pad[1] = (src[nwords] >> 16) & 0xff;
190 pad[2] = (src[nwords] >> 8) & 0xff;
191 pad[3] = (src[nwords] >> 0) & 0xff;
192 memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
193}
194
195/**
196 * encode_bch - calculate BCH ecc parity of data
197 * @bch: BCH control structure
198 * @data: data to encode
199 * @len: data length in bytes
200 * @ecc: ecc parity data, must be initialized by caller
201 *
202 * The @ecc parity array is used both as input and output parameter, in order to
203 * allow incremental computations. It should be of the size indicated by member
204 * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
205 *
206 * The exact number of computed ecc parity bits is given by member @ecc_bits of
207 * @bch; it may be less than m*t for large values of t.
208 */
209void encode_bch(struct bch_control *bch, const uint8_t *data,
210 unsigned int len, uint8_t *ecc)
211{
212 const unsigned int l = BCH_ECC_WORDS(bch)-1;
213 unsigned int i, mlen;
214 unsigned long m;
215 uint32_t w, r[l+1];
216 const uint32_t * const tab0 = bch->mod8_tab;
217 const uint32_t * const tab1 = tab0 + 256*(l+1);
218 const uint32_t * const tab2 = tab1 + 256*(l+1);
219 const uint32_t * const tab3 = tab2 + 256*(l+1);
220 const uint32_t *pdata, *p0, *p1, *p2, *p3;
221
222 if (ecc) {
223 /* load ecc parity bytes into internal 32-bit buffer */
224 load_ecc8(bch, bch->ecc_buf, ecc);
225 } else {
226 memset(bch->ecc_buf, 0, sizeof(r));
227 }
228
229 /* process first unaligned data bytes */
230 m = ((unsigned long)data) & 3;
231 if (m) {
232 mlen = (len < (4-m)) ? len : 4-m;
233 encode_bch_unaligned(bch, data, mlen, bch->ecc_buf);
234 data += mlen;
235 len -= mlen;
236 }
237
238 /* process 32-bit aligned data words */
239 pdata = (uint32_t *)data;
240 mlen = len/4;
241 data += 4*mlen;
242 len -= 4*mlen;
243 memcpy(r, bch->ecc_buf, sizeof(r));
244
245 /*
246 * split each 32-bit word into 4 polynomials of weight 8 as follows:
247 *
248 * 31 ...24 23 ...16 15 ... 8 7 ... 0
249 * xxxxxxxx yyyyyyyy zzzzzzzz tttttttt
250 * tttttttt mod g = r0 (precomputed)
251 * zzzzzzzz 00000000 mod g = r1 (precomputed)
252 * yyyyyyyy 00000000 00000000 mod g = r2 (precomputed)
253 * xxxxxxxx 00000000 00000000 00000000 mod g = r3 (precomputed)
254 * xxxxxxxx yyyyyyyy zzzzzzzz tttttttt mod g = r0^r1^r2^r3
255 */
256 while (mlen--) {
257 /* input data is read in big-endian format */
258 w = r[0]^cpu_to_be32(*pdata++);
259 p0 = tab0 + (l+1)*((w >> 0) & 0xff);
260 p1 = tab1 + (l+1)*((w >> 8) & 0xff);
261 p2 = tab2 + (l+1)*((w >> 16) & 0xff);
262 p3 = tab3 + (l+1)*((w >> 24) & 0xff);
263
264 for (i = 0; i < l; i++)
265 r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
266
267 r[l] = p0[l]^p1[l]^p2[l]^p3[l];
268 }
269 memcpy(bch->ecc_buf, r, sizeof(r));
270
271 /* process last unaligned bytes */
272 if (len)
273 encode_bch_unaligned(bch, data, len, bch->ecc_buf);
274
275 /* store ecc parity bytes into original parity buffer */
276 if (ecc)
277 store_ecc8(bch, ecc, bch->ecc_buf);
278}
279EXPORT_SYMBOL_GPL(encode_bch);
280
281static inline int modulo(struct bch_control *bch, unsigned int v)
282{
283 const unsigned int n = GF_N(bch);
284 while (v >= n) {
285 v -= n;
286 v = (v & n) + (v >> GF_M(bch));
287 }
288 return v;
289}
290
291/*
292 * shorter and faster modulo function, only works when v < 2N.
293 */
294static inline int mod_s(struct bch_control *bch, unsigned int v)
295{
296 const unsigned int n = GF_N(bch);
297 return (v < n) ? v : v-n;
298}
299
300static inline int deg(unsigned int poly)
301{
302 /* polynomial degree is the most-significant bit index */
303 return fls(poly)-1;
304}
305
306static inline int parity(unsigned int x)
307{
308 /*
309 * public domain code snippet, lifted from
310 * http://www-graphics.stanford.edu/~seander/bithacks.html
311 */
312 x ^= x >> 1;
313 x ^= x >> 2;
314 x = (x & 0x11111111U) * 0x11111111U;
315 return (x >> 28) & 1;
316}
317
318/* Galois field basic operations: multiply, divide, inverse, etc. */
319
320static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
321 unsigned int b)
322{
323 return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
324 bch->a_log_tab[b])] : 0;
325}
326
327static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
328{
329 return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
330}
331
332static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
333 unsigned int b)
334{
335 return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
336 GF_N(bch)-bch->a_log_tab[b])] : 0;
337}
338
339static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
340{
341 return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
342}
343
344static inline unsigned int a_pow(struct bch_control *bch, int i)
345{
346 return bch->a_pow_tab[modulo(bch, i)];
347}
348
349static inline int a_log(struct bch_control *bch, unsigned int x)
350{
351 return bch->a_log_tab[x];
352}
353
354static inline int a_ilog(struct bch_control *bch, unsigned int x)
355{
356 return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
357}
358
359/*
360 * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
361 */
362static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
363 unsigned int *syn)
364{
365 int i, j, s;
366 unsigned int m;
367 uint32_t poly;
368 const int t = GF_T(bch);
369
370 s = bch->ecc_bits;
371
372 /* make sure extra bits in last ecc word are cleared */
373 m = ((unsigned int)s) & 31;
374 if (m)
375 ecc[s/32] &= ~((1u << (32-m))-1);
376 memset(syn, 0, 2*t*sizeof(*syn));
377
378 /* compute v(a^j) for j=1 .. 2t-1 */
379 do {
380 poly = *ecc++;
381 s -= 32;
382 while (poly) {
383 i = deg(poly);
384 for (j = 0; j < 2*t; j += 2)
385 syn[j] ^= a_pow(bch, (j+1)*(i+s));
386
387 poly ^= (1 << i);
388 }
389 } while (s > 0);
390
391 /* v(a^(2j)) = v(a^j)^2 */
392 for (j = 0; j < t; j++)
393 syn[2*j+1] = gf_sqr(bch, syn[j]);
394}
395
396static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
397{
398 memcpy(dst, src, GF_POLY_SZ(src->deg));
399}
400
401static int compute_error_locator_polynomial(struct bch_control *bch,
402 const unsigned int *syn)
403{
404 const unsigned int t = GF_T(bch);
405 const unsigned int n = GF_N(bch);
406 unsigned int i, j, tmp, l, pd = 1, d = syn[0];
407 struct gf_poly *elp = bch->elp;
408 struct gf_poly *pelp = bch->poly_2t[0];
409 struct gf_poly *elp_copy = bch->poly_2t[1];
410 int k, pp = -1;
411
412 memset(pelp, 0, GF_POLY_SZ(2*t));
413 memset(elp, 0, GF_POLY_SZ(2*t));
414
415 pelp->deg = 0;
416 pelp->c[0] = 1;
417 elp->deg = 0;
418 elp->c[0] = 1;
419
420 /* use simplified binary Berlekamp-Massey algorithm */
421 for (i = 0; (i < t) && (elp->deg <= t); i++) {
422 if (d) {
423 k = 2*i-pp;
424 gf_poly_copy(elp_copy, elp);
425 /* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
426 tmp = a_log(bch, d)+n-a_log(bch, pd);
427 for (j = 0; j <= pelp->deg; j++) {
428 if (pelp->c[j]) {
429 l = a_log(bch, pelp->c[j]);
430 elp->c[j+k] ^= a_pow(bch, tmp+l);
431 }
432 }
433 /* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
434 tmp = pelp->deg+k;
435 if (tmp > elp->deg) {
436 elp->deg = tmp;
437 gf_poly_copy(pelp, elp_copy);
438 pd = d;
439 pp = 2*i;
440 }
441 }
442 /* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
443 if (i < t-1) {
444 d = syn[2*i+2];
445 for (j = 1; j <= elp->deg; j++)
446 d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
447 }
448 }
449 dbg("elp=%s\n", gf_poly_str(elp));
450 return (elp->deg > t) ? -1 : (int)elp->deg;
451}
452
453/*
454 * solve a m x m linear system in GF(2) with an expected number of solutions,
455 * and return the number of found solutions
456 */
457static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
458 unsigned int *sol, int nsol)
459{
460 const int m = GF_M(bch);
461 unsigned int tmp, mask;
462 int rem, c, r, p, k, param[m];
463
464 k = 0;
465 mask = 1 << m;
466
467 /* Gaussian elimination */
468 for (c = 0; c < m; c++) {
469 rem = 0;
470 p = c-k;
471 /* find suitable row for elimination */
472 for (r = p; r < m; r++) {
473 if (rows[r] & mask) {
474 if (r != p) {
475 tmp = rows[r];
476 rows[r] = rows[p];
477 rows[p] = tmp;
478 }
479 rem = r+1;
480 break;
481 }
482 }
483 if (rem) {
484 /* perform elimination on remaining rows */
485 tmp = rows[p];
486 for (r = rem; r < m; r++) {
487 if (rows[r] & mask)
488 rows[r] ^= tmp;
489 }
490 } else {
491 /* elimination not needed, store defective row index */
492 param[k++] = c;
493 }
494 mask >>= 1;
495 }
496 /* rewrite system, inserting fake parameter rows */
497 if (k > 0) {
498 p = k;
499 for (r = m-1; r >= 0; r--) {
500 if ((r > m-1-k) && rows[r])
501 /* system has no solution */
502 return 0;
503
504 rows[r] = (p && (r == param[p-1])) ?
505 p--, 1u << (m-r) : rows[r-p];
506 }
507 }
508
509 if (nsol != (1 << k))
510 /* unexpected number of solutions */
511 return 0;
512
513 for (p = 0; p < nsol; p++) {
514 /* set parameters for p-th solution */
515 for (c = 0; c < k; c++)
516 rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
517
518 /* compute unique solution */
519 tmp = 0;
520 for (r = m-1; r >= 0; r--) {
521 mask = rows[r] & (tmp|1);
522 tmp |= parity(mask) << (m-r);
523 }
524 sol[p] = tmp >> 1;
525 }
526 return nsol;
527}
528
529/*
530 * this function builds and solves a linear system for finding roots of a degree
531 * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
532 */
533static int find_affine4_roots(struct bch_control *bch, unsigned int a,
534 unsigned int b, unsigned int c,
535 unsigned int *roots)
536{
537 int i, j, k;
538 const int m = GF_M(bch);
539 unsigned int mask = 0xff, t, rows[16] = {0,};
540
541 j = a_log(bch, b);
542 k = a_log(bch, a);
543 rows[0] = c;
544
545 /* buid linear system to solve X^4+aX^2+bX+c = 0 */
546 for (i = 0; i < m; i++) {
547 rows[i+1] = bch->a_pow_tab[4*i]^
548 (a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
549 (b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
550 j++;
551 k += 2;
552 }
553 /*
554 * transpose 16x16 matrix before passing it to linear solver
555 * warning: this code assumes m < 16
556 */
557 for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
558 for (k = 0; k < 16; k = (k+j+1) & ~j) {
559 t = ((rows[k] >> j)^rows[k+j]) & mask;
560 rows[k] ^= (t << j);
561 rows[k+j] ^= t;
562 }
563 }
564 return solve_linear_system(bch, rows, roots, 4);
565}
566
567/*
568 * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
569 */
570static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
571 unsigned int *roots)
572{
573 int n = 0;
574
575 if (poly->c[0])
576 /* poly[X] = bX+c with c!=0, root=c/b */
577 roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
578 bch->a_log_tab[poly->c[1]]);
579 return n;
580}
581
582/*
583 * compute roots of a degree 2 polynomial over GF(2^m)
584 */
585static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
586 unsigned int *roots)
587{
588 int n = 0, i, l0, l1, l2;
589 unsigned int u, v, r;
590
591 if (poly->c[0] && poly->c[1]) {
592
593 l0 = bch->a_log_tab[poly->c[0]];
594 l1 = bch->a_log_tab[poly->c[1]];
595 l2 = bch->a_log_tab[poly->c[2]];
596
597 /* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
598 u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
599 /*
600 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
601 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
602 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
603 * i.e. r and r+1 are roots iff Tr(u)=0
604 */
605 r = 0;
606 v = u;
607 while (v) {
608 i = deg(v);
609 r ^= bch->xi_tab[i];
610 v ^= (1 << i);
611 }
612 /* verify root */
613 if ((gf_sqr(bch, r)^r) == u) {
614 /* reverse z=a/bX transformation and compute log(1/r) */
615 roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
616 bch->a_log_tab[r]+l2);
617 roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
618 bch->a_log_tab[r^1]+l2);
619 }
620 }
621 return n;
622}
623
624/*
625 * compute roots of a degree 3 polynomial over GF(2^m)
626 */
627static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
628 unsigned int *roots)
629{
630 int i, n = 0;
631 unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
632
633 if (poly->c[0]) {
634 /* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
635 e3 = poly->c[3];
636 c2 = gf_div(bch, poly->c[0], e3);
637 b2 = gf_div(bch, poly->c[1], e3);
638 a2 = gf_div(bch, poly->c[2], e3);
639
640 /* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
641 c = gf_mul(bch, a2, c2); /* c = a2c2 */
642 b = gf_mul(bch, a2, b2)^c2; /* b = a2b2 + c2 */
643 a = gf_sqr(bch, a2)^b2; /* a = a2^2 + b2 */
644
645 /* find the 4 roots of this affine polynomial */
646 if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
647 /* remove a2 from final list of roots */
648 for (i = 0; i < 4; i++) {
649 if (tmp[i] != a2)
650 roots[n++] = a_ilog(bch, tmp[i]);
651 }
652 }
653 }
654 return n;
655}
656
657/*
658 * compute roots of a degree 4 polynomial over GF(2^m)
659 */
660static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
661 unsigned int *roots)
662{
663 int i, l, n = 0;
664 unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
665
666 if (poly->c[0] == 0)
667 return 0;
668
669 /* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
670 e4 = poly->c[4];
671 d = gf_div(bch, poly->c[0], e4);
672 c = gf_div(bch, poly->c[1], e4);
673 b = gf_div(bch, poly->c[2], e4);
674 a = gf_div(bch, poly->c[3], e4);
675
676 /* use Y=1/X transformation to get an affine polynomial */
677 if (a) {
678 /* first, eliminate cX by using z=X+e with ae^2+c=0 */
679 if (c) {
680 /* compute e such that e^2 = c/a */
681 f = gf_div(bch, c, a);
682 l = a_log(bch, f);
683 l += (l & 1) ? GF_N(bch) : 0;
684 e = a_pow(bch, l/2);
685 /*
686 * use transformation z=X+e:
687 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
688 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
689 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
690 * z^4 + az^3 + b'z^2 + d'
691 */
692 d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
693 b = gf_mul(bch, a, e)^b;
694 }
695 /* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
696 if (d == 0)
697 /* assume all roots have multiplicity 1 */
698 return 0;
699
700 c2 = gf_inv(bch, d);
701 b2 = gf_div(bch, a, d);
702 a2 = gf_div(bch, b, d);
703 } else {
704 /* polynomial is already affine */
705 c2 = d;
706 b2 = c;
707 a2 = b;
708 }
709 /* find the 4 roots of this affine polynomial */
710 if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
711 for (i = 0; i < 4; i++) {
712 /* post-process roots (reverse transformations) */
713 f = a ? gf_inv(bch, roots[i]) : roots[i];
714 roots[i] = a_ilog(bch, f^e);
715 }
716 n = 4;
717 }
718 return n;
719}
720
721/*
722 * build monic, log-based representation of a polynomial
723 */
724static void gf_poly_logrep(struct bch_control *bch,
725 const struct gf_poly *a, int *rep)
726{
727 int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
728
729 /* represent 0 values with -1; warning, rep[d] is not set to 1 */
730 for (i = 0; i < d; i++)
731 rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
732}
733
734/*
735 * compute polynomial Euclidean division remainder in GF(2^m)[X]
736 */
737static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
738 const struct gf_poly *b, int *rep)
739{
740 int la, p, m;
741 unsigned int i, j, *c = a->c;
742 const unsigned int d = b->deg;
743
744 if (a->deg < d)
745 return;
746
747 /* reuse or compute log representation of denominator */
748 if (!rep) {
749 rep = bch->cache;
750 gf_poly_logrep(bch, b, rep);
751 }
752
753 for (j = a->deg; j >= d; j--) {
754 if (c[j]) {
755 la = a_log(bch, c[j]);
756 p = j-d;
757 for (i = 0; i < d; i++, p++) {
758 m = rep[i];
759 if (m >= 0)
760 c[p] ^= bch->a_pow_tab[mod_s(bch,
761 m+la)];
762 }
763 }
764 }
765 a->deg = d-1;
766 while (!c[a->deg] && a->deg)
767 a->deg--;
768}
769
770/*
771 * compute polynomial Euclidean division quotient in GF(2^m)[X]
772 */
773static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
774 const struct gf_poly *b, struct gf_poly *q)
775{
776 if (a->deg >= b->deg) {
777 q->deg = a->deg-b->deg;
778 /* compute a mod b (modifies a) */
779 gf_poly_mod(bch, a, b, NULL);
780 /* quotient is stored in upper part of polynomial a */
781 memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
782 } else {
783 q->deg = 0;
784 q->c[0] = 0;
785 }
786}
787
788/*
789 * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
790 */
791static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
792 struct gf_poly *b)
793{
794 struct gf_poly *tmp;
795
796 dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
797
798 if (a->deg < b->deg) {
799 tmp = b;
800 b = a;
801 a = tmp;
802 }
803
804 while (b->deg > 0) {
805 gf_poly_mod(bch, a, b, NULL);
806 tmp = b;
807 b = a;
808 a = tmp;
809 }
810
811 dbg("%s\n", gf_poly_str(a));
812
813 return a;
814}
815
816/*
817 * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
818 * This is used in Berlekamp Trace algorithm for splitting polynomials
819 */
820static void compute_trace_bk_mod(struct bch_control *bch, int k,
821 const struct gf_poly *f, struct gf_poly *z,
822 struct gf_poly *out)
823{
824 const int m = GF_M(bch);
825 int i, j;
826
827 /* z contains z^2j mod f */
828 z->deg = 1;
829 z->c[0] = 0;
830 z->c[1] = bch->a_pow_tab[k];
831
832 out->deg = 0;
833 memset(out, 0, GF_POLY_SZ(f->deg));
834
835 /* compute f log representation only once */
836 gf_poly_logrep(bch, f, bch->cache);
837
838 for (i = 0; i < m; i++) {
839 /* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
840 for (j = z->deg; j >= 0; j--) {
841 out->c[j] ^= z->c[j];
842 z->c[2*j] = gf_sqr(bch, z->c[j]);
843 z->c[2*j+1] = 0;
844 }
845 if (z->deg > out->deg)
846 out->deg = z->deg;
847
848 if (i < m-1) {
849 z->deg *= 2;
850 /* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
851 gf_poly_mod(bch, z, f, bch->cache);
852 }
853 }
854 while (!out->c[out->deg] && out->deg)
855 out->deg--;
856
857 dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
858}
859
860/*
861 * factor a polynomial using Berlekamp Trace algorithm (BTA)
862 */
863static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
864 struct gf_poly **g, struct gf_poly **h)
865{
866 struct gf_poly *f2 = bch->poly_2t[0];
867 struct gf_poly *q = bch->poly_2t[1];
868 struct gf_poly *tk = bch->poly_2t[2];
869 struct gf_poly *z = bch->poly_2t[3];
870 struct gf_poly *gcd;
871
872 dbg("factoring %s...\n", gf_poly_str(f));
873
874 *g = f;
875 *h = NULL;
876
877 /* tk = Tr(a^k.X) mod f */
878 compute_trace_bk_mod(bch, k, f, z, tk);
879
880 if (tk->deg > 0) {
881 /* compute g = gcd(f, tk) (destructive operation) */
882 gf_poly_copy(f2, f);
883 gcd = gf_poly_gcd(bch, f2, tk);
884 if (gcd->deg < f->deg) {
885 /* compute h=f/gcd(f,tk); this will modify f and q */
886 gf_poly_div(bch, f, gcd, q);
887 /* store g and h in-place (clobbering f) */
888 *h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
889 gf_poly_copy(*g, gcd);
890 gf_poly_copy(*h, q);
891 }
892 }
893}
894
895/*
896 * find roots of a polynomial, using BTZ algorithm; see the beginning of this
897 * file for details
898 */
899static int find_poly_roots(struct bch_control *bch, unsigned int k,
900 struct gf_poly *poly, unsigned int *roots)
901{
902 int cnt;
903 struct gf_poly *f1, *f2;
904
905 switch (poly->deg) {
906 /* handle low degree polynomials with ad hoc techniques */
907 case 1:
908 cnt = find_poly_deg1_roots(bch, poly, roots);
909 break;
910 case 2:
911 cnt = find_poly_deg2_roots(bch, poly, roots);
912 break;
913 case 3:
914 cnt = find_poly_deg3_roots(bch, poly, roots);
915 break;
916 case 4:
917 cnt = find_poly_deg4_roots(bch, poly, roots);
918 break;
919 default:
920 /* factor polynomial using Berlekamp Trace Algorithm (BTA) */
921 cnt = 0;
922 if (poly->deg && (k <= GF_M(bch))) {
923 factor_polynomial(bch, k, poly, &f1, &f2);
924 if (f1)
925 cnt += find_poly_roots(bch, k+1, f1, roots);
926 if (f2)
927 cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
928 }
929 break;
930 }
931 return cnt;
932}
933
934#if defined(USE_CHIEN_SEARCH)
935/*
936 * exhaustive root search (Chien) implementation - not used, included only for
937 * reference/comparison tests
938 */
939static int chien_search(struct bch_control *bch, unsigned int len,
940 struct gf_poly *p, unsigned int *roots)
941{
942 int m;
943 unsigned int i, j, syn, syn0, count = 0;
944 const unsigned int k = 8*len+bch->ecc_bits;
945
946 /* use a log-based representation of polynomial */
947 gf_poly_logrep(bch, p, bch->cache);
948 bch->cache[p->deg] = 0;
949 syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
950
951 for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
952 /* compute elp(a^i) */
953 for (j = 1, syn = syn0; j <= p->deg; j++) {
954 m = bch->cache[j];
955 if (m >= 0)
956 syn ^= a_pow(bch, m+j*i);
957 }
958 if (syn == 0) {
959 roots[count++] = GF_N(bch)-i;
960 if (count == p->deg)
961 break;
962 }
963 }
964 return (count == p->deg) ? count : 0;
965}
966#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
967#endif /* USE_CHIEN_SEARCH */
968
969/**
970 * decode_bch - decode received codeword and find bit error locations
971 * @bch: BCH control structure
972 * @data: received data, ignored if @calc_ecc is provided
973 * @len: data length in bytes, must always be provided
974 * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
975 * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
976 * @syn: hw computed syndrome data (if NULL, syndrome is calculated)
977 * @errloc: output array of error locations
978 *
979 * Returns:
980 * The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
981 * invalid parameters were provided
982 *
983 * Depending on the available hw BCH support and the need to compute @calc_ecc
984 * separately (using encode_bch()), this function should be called with one of
985 * the following parameter configurations -
986 *
987 * by providing @data and @recv_ecc only:
988 * decode_bch(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
989 *
990 * by providing @recv_ecc and @calc_ecc:
991 * decode_bch(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
992 *
993 * by providing ecc = recv_ecc XOR calc_ecc:
994 * decode_bch(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
995 *
996 * by providing syndrome results @syn:
997 * decode_bch(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
998 *
999 * Once decode_bch() has successfully returned with a positive value, error
1000 * locations returned in array @errloc should be interpreted as follows -
1001 *
1002 * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1003 * data correction)
1004 *
1005 * if (errloc[n] < 8*len), then n-th error is located in data and can be
1006 * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1007 *
1008 * Note that this function does not perform any data correction by itself, it
1009 * merely indicates error locations.
1010 */
1011int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len,
1012 const uint8_t *recv_ecc, const uint8_t *calc_ecc,
1013 const unsigned int *syn, unsigned int *errloc)
1014{
1015 const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1016 unsigned int nbits;
1017 int i, err, nroots;
1018 uint32_t sum;
1019
1020 /* sanity check: make sure data length can be handled */
1021 if (8*len > (bch->n-bch->ecc_bits))
1022 return -EINVAL;
1023
1024 /* if caller does not provide syndromes, compute them */
1025 if (!syn) {
1026 if (!calc_ecc) {
1027 /* compute received data ecc into an internal buffer */
1028 if (!data || !recv_ecc)
1029 return -EINVAL;
1030 encode_bch(bch, data, len, NULL);
1031 } else {
1032 /* load provided calculated ecc */
1033 load_ecc8(bch, bch->ecc_buf, calc_ecc);
1034 }
1035 /* load received ecc or assume it was XORed in calc_ecc */
1036 if (recv_ecc) {
1037 load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1038 /* XOR received and calculated ecc */
1039 for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1040 bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1041 sum |= bch->ecc_buf[i];
1042 }
1043 if (!sum)
1044 /* no error found */
1045 return 0;
1046 }
1047 compute_syndromes(bch, bch->ecc_buf, bch->syn);
1048 syn = bch->syn;
1049 }
1050
1051 err = compute_error_locator_polynomial(bch, syn);
1052 if (err > 0) {
1053 nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1054 if (err != nroots)
1055 err = -1;
1056 }
1057 if (err > 0) {
1058 /* post-process raw error locations for easier correction */
1059 nbits = (len*8)+bch->ecc_bits;
1060 for (i = 0; i < err; i++) {
1061 if (errloc[i] >= nbits) {
1062 err = -2;
1063 break;
1064 }
1065 errloc[i] = nbits-1-errloc[i];
1066 errloc[i] = (errloc[i] & ~7)|(7-(errloc[i] & 7));
1067 }
1068 }
1069 return err;
1070}
1071EXPORT_SYMBOL_GPL(decode_bch);
1072
1073/*
1074 * generate Galois field lookup tables
1075 */
1076static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1077{
1078 unsigned int i, x = 1;
1079 const unsigned int k = 1 << deg(poly);
1080
1081 /* primitive polynomial must be of degree m */
1082 if (k != (1u << GF_M(bch)))
1083 return -1;
1084
1085 for (i = 0; i < GF_N(bch); i++) {
1086 bch->a_pow_tab[i] = x;
1087 bch->a_log_tab[x] = i;
1088 if (i && (x == 1))
1089 /* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1090 return -1;
1091 x <<= 1;
1092 if (x & k)
1093 x ^= poly;
1094 }
1095 bch->a_pow_tab[GF_N(bch)] = 1;
1096 bch->a_log_tab[0] = 0;
1097
1098 return 0;
1099}
1100
1101/*
1102 * compute generator polynomial remainder tables for fast encoding
1103 */
1104static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1105{
1106 int i, j, b, d;
1107 uint32_t data, hi, lo, *tab;
1108 const int l = BCH_ECC_WORDS(bch);
1109 const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1110 const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1111
1112 memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1113
1114 for (i = 0; i < 256; i++) {
1115 /* p(X)=i is a small polynomial of weight <= 8 */
1116 for (b = 0; b < 4; b++) {
1117 /* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1118 tab = bch->mod8_tab + (b*256+i)*l;
1119 data = i << (8*b);
1120 while (data) {
1121 d = deg(data);
1122 /* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1123 data ^= g[0] >> (31-d);
1124 for (j = 0; j < ecclen; j++) {
1125 hi = (d < 31) ? g[j] << (d+1) : 0;
1126 lo = (j+1 < plen) ?
1127 g[j+1] >> (31-d) : 0;
1128 tab[j] ^= hi|lo;
1129 }
1130 }
1131 }
1132 }
1133}
1134
1135/*
1136 * build a base for factoring degree 2 polynomials
1137 */
1138static int build_deg2_base(struct bch_control *bch)
1139{
1140 const int m = GF_M(bch);
1141 int i, j, r;
1142 unsigned int sum, x, y, remaining, ak = 0, xi[m];
1143
1144 /* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1145 for (i = 0; i < m; i++) {
1146 for (j = 0, sum = 0; j < m; j++)
1147 sum ^= a_pow(bch, i*(1 << j));
1148
1149 if (sum) {
1150 ak = bch->a_pow_tab[i];
1151 break;
1152 }
1153 }
1154 /* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1155 remaining = m;
1156 memset(xi, 0, sizeof(xi));
1157
1158 for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1159 y = gf_sqr(bch, x)^x;
1160 for (i = 0; i < 2; i++) {
1161 r = a_log(bch, y);
1162 if (y && (r < m) && !xi[r]) {
1163 bch->xi_tab[r] = x;
1164 xi[r] = 1;
1165 remaining--;
1166 dbg("x%d = %x\n", r, x);
1167 break;
1168 }
1169 y ^= ak;
1170 }
1171 }
1172 /* should not happen but check anyway */
1173 return remaining ? -1 : 0;
1174}
1175
1176static void *bch_alloc(size_t size, int *err)
1177{
1178 void *ptr;
1179
1180 ptr = kmalloc(size, GFP_KERNEL);
1181 if (ptr == NULL)
1182 *err = 1;
1183 return ptr;
1184}
1185
1186/*
1187 * compute generator polynomial for given (m,t) parameters.
1188 */
1189static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1190{
1191 const unsigned int m = GF_M(bch);
1192 const unsigned int t = GF_T(bch);
1193 int n, err = 0;
1194 unsigned int i, j, nbits, r, word, *roots;
1195 struct gf_poly *g;
1196 uint32_t *genpoly;
1197
1198 g = bch_alloc(GF_POLY_SZ(m*t), &err);
1199 roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1200 genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1201
1202 if (err) {
1203 kfree(genpoly);
1204 genpoly = NULL;
1205 goto finish;
1206 }
1207
1208 /* enumerate all roots of g(X) */
1209 memset(roots , 0, (bch->n+1)*sizeof(*roots));
1210 for (i = 0; i < t; i++) {
1211 for (j = 0, r = 2*i+1; j < m; j++) {
1212 roots[r] = 1;
1213 r = mod_s(bch, 2*r);
1214 }
1215 }
1216 /* build generator polynomial g(X) */
1217 g->deg = 0;
1218 g->c[0] = 1;
1219 for (i = 0; i < GF_N(bch); i++) {
1220 if (roots[i]) {
1221 /* multiply g(X) by (X+root) */
1222 r = bch->a_pow_tab[i];
1223 g->c[g->deg+1] = 1;
1224 for (j = g->deg; j > 0; j--)
1225 g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1226
1227 g->c[0] = gf_mul(bch, g->c[0], r);
1228 g->deg++;
1229 }
1230 }
1231 /* store left-justified binary representation of g(X) */
1232 n = g->deg+1;
1233 i = 0;
1234
1235 while (n > 0) {
1236 nbits = (n > 32) ? 32 : n;
1237 for (j = 0, word = 0; j < nbits; j++) {
1238 if (g->c[n-1-j])
1239 word |= 1u << (31-j);
1240 }
1241 genpoly[i++] = word;
1242 n -= nbits;
1243 }
1244 bch->ecc_bits = g->deg;
1245
1246finish:
1247 kfree(g);
1248 kfree(roots);
1249
1250 return genpoly;
1251}
1252
1253/**
1254 * init_bch - initialize a BCH encoder/decoder
1255 * @m: Galois field order, should be in the range 5-15
1256 * @t: maximum error correction capability, in bits
1257 * @prim_poly: user-provided primitive polynomial (or 0 to use default)
1258 *
1259 * Returns:
1260 * a newly allocated BCH control structure if successful, NULL otherwise
1261 *
1262 * This initialization can take some time, as lookup tables are built for fast
1263 * encoding/decoding; make sure not to call this function from a time critical
1264 * path. Usually, init_bch() should be called on module/driver init and
1265 * free_bch() should be called to release memory on exit.
1266 *
1267 * You may provide your own primitive polynomial of degree @m in argument
1268 * @prim_poly, or let init_bch() use its default polynomial.
1269 *
1270 * Once init_bch() has successfully returned a pointer to a newly allocated
1271 * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1272 * the structure.
1273 */
1274struct bch_control *init_bch(int m, int t, unsigned int prim_poly)
1275{
1276 int err = 0;
1277 unsigned int i, words;
1278 uint32_t *genpoly;
1279 struct bch_control *bch = NULL;
1280
1281 const int min_m = 5;
1282 const int max_m = 15;
1283
1284 /* default primitive polynomials */
1285 static const unsigned int prim_poly_tab[] = {
1286 0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1287 0x402b, 0x8003,
1288 };
1289
1290#if defined(CONFIG_BCH_CONST_PARAMS)
1291 if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1292 printk(KERN_ERR "bch encoder/decoder was configured to support "
1293 "parameters m=%d, t=%d only!\n",
1294 CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1295 goto fail;
1296 }
1297#endif
1298 if ((m < min_m) || (m > max_m))
1299 /*
1300 * values of m greater than 15 are not currently supported;
1301 * supporting m > 15 would require changing table base type
1302 * (uint16_t) and a small patch in matrix transposition
1303 */
1304 goto fail;
1305
1306 /* sanity checks */
1307 if ((t < 1) || (m*t >= ((1 << m)-1)))
1308 /* invalid t value */
1309 goto fail;
1310
1311 /* select a primitive polynomial for generating GF(2^m) */
1312 if (prim_poly == 0)
1313 prim_poly = prim_poly_tab[m-min_m];
1314
1315 bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1316 if (bch == NULL)
1317 goto fail;
1318
1319 bch->m = m;
1320 bch->t = t;
1321 bch->n = (1 << m)-1;
1322 words = DIV_ROUND_UP(m*t, 32);
1323 bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1324 bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1325 bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1326 bch->mod8_tab = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1327 bch->ecc_buf = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1328 bch->ecc_buf2 = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1329 bch->xi_tab = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1330 bch->syn = bch_alloc(2*t*sizeof(*bch->syn), &err);
1331 bch->cache = bch_alloc(2*t*sizeof(*bch->cache), &err);
1332 bch->elp = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1333
1334 for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1335 bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1336
1337 if (err)
1338 goto fail;
1339
1340 err = build_gf_tables(bch, prim_poly);
1341 if (err)
1342 goto fail;
1343
1344 /* use generator polynomial for computing encoding tables */
1345 genpoly = compute_generator_polynomial(bch);
1346 if (genpoly == NULL)
1347 goto fail;
1348
1349 build_mod8_tables(bch, genpoly);
1350 kfree(genpoly);
1351
1352 err = build_deg2_base(bch);
1353 if (err)
1354 goto fail;
1355
1356 return bch;
1357
1358fail:
1359 free_bch(bch);
1360 return NULL;
1361}
1362EXPORT_SYMBOL_GPL(init_bch);
1363
1364/**
1365 * free_bch - free the BCH control structure
1366 * @bch: BCH control structure to release
1367 */
1368void free_bch(struct bch_control *bch)
1369{
1370 unsigned int i;
1371
1372 if (bch) {
1373 kfree(bch->a_pow_tab);
1374 kfree(bch->a_log_tab);
1375 kfree(bch->mod8_tab);
1376 kfree(bch->ecc_buf);
1377 kfree(bch->ecc_buf2);
1378 kfree(bch->xi_tab);
1379 kfree(bch->syn);
1380 kfree(bch->cache);
1381 kfree(bch->elp);
1382
1383 for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1384 kfree(bch->poly_2t[i]);
1385
1386 kfree(bch);
1387 }
1388}
1389EXPORT_SYMBOL_GPL(free_bch);
1390
1391MODULE_LICENSE("GPL");
1392MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1393MODULE_DESCRIPTION("Binary BCH encoder/decoder");
diff --git a/utils/rk27utils/nandextract/libbch.h b/utils/rk27utils/nandextract/libbch.h
new file mode 100644
index 0000000000..b4c779e815
--- /dev/null
+++ b/utils/rk27utils/nandextract/libbch.h
@@ -0,0 +1,113 @@
1/*
2 * Generic binary BCH encoding/decoding library
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 51
15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 * Copyright © 2011 Parrot S.A.
18 *
19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
20 *
21 * Description:
22 *
23 * This library provides runtime configurable encoding/decoding of binary
24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
25*/
26#ifndef _BCH_H
27#define _BCH_H
28
29#include <stdint.h>
30
31#if defined(CONFIG_BCH_CONST_PARAMS)
32#define GF_M(_p) (CONFIG_BCH_CONST_M)
33#define GF_T(_p) (CONFIG_BCH_CONST_T)
34#define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1)
35#else
36#define GF_M(_p) ((_p)->m)
37#define GF_T(_p) ((_p)->t)
38#define GF_N(_p) ((_p)->n)
39#endif
40
41#define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
42#define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
43
44#ifndef dbg
45#define dbg(_fmt, args...) do {} while (0)
46#endif
47
48/*
49 * represent a polynomial over GF(2^m)
50 */
51struct gf_poly {
52 unsigned int deg; /* polynomial degree */
53 unsigned int c[0]; /* polynomial terms */
54};
55
56/* given its degree, compute a polynomial size in bytes */
57#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
58
59/* polynomial of degree 1 */
60struct gf_poly_deg1 {
61 struct gf_poly poly;
62 unsigned int c[2];
63};
64
65/**
66 * struct bch_control - BCH control structure
67 * @m: Galois field order
68 * @n: maximum codeword size in bits (= 2^m-1)
69 * @t: error correction capability in bits
70 * @ecc_bits: ecc exact size in bits, i.e. generator polynomial degree (<=m*t)
71 * @ecc_bytes: ecc max size (m*t bits) in bytes
72 * @a_pow_tab: Galois field GF(2^m) exponentiation lookup table
73 * @a_log_tab: Galois field GF(2^m) log lookup table
74 * @mod8_tab: remainder generator polynomial lookup tables
75 * @ecc_buf: ecc parity words buffer
76 * @ecc_buf2: ecc parity words buffer
77 * @xi_tab: GF(2^m) base for solving degree 2 polynomial roots
78 * @syn: syndrome buffer
79 * @cache: log-based polynomial representation buffer
80 * @elp: error locator polynomial
81 * @poly_2t: temporary polynomials of degree 2t
82 */
83struct bch_control {
84 unsigned int m;
85 unsigned int n;
86 unsigned int t;
87 unsigned int ecc_bits;
88 unsigned int ecc_bytes;
89/* private: */
90 uint16_t *a_pow_tab;
91 uint16_t *a_log_tab;
92 uint32_t *mod8_tab;
93 uint32_t *ecc_buf;
94 uint32_t *ecc_buf2;
95 unsigned int *xi_tab;
96 unsigned int *syn;
97 int *cache;
98 struct gf_poly *elp;
99 struct gf_poly *poly_2t[4];
100};
101
102struct bch_control *init_bch(int m, int t, unsigned int prim_poly);
103
104void free_bch(struct bch_control *bch);
105
106void encode_bch(struct bch_control *bch, const uint8_t *data,
107 unsigned int len, uint8_t *ecc);
108
109int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len,
110 const uint8_t *recv_ecc, const uint8_t *calc_ecc,
111 const unsigned int *syn, unsigned int *errloc);
112
113#endif /* _BCH_H */
diff --git a/utils/rk27utils/nandextract/nandextract.c b/utils/rk27utils/nandextract/nandextract.c
new file mode 100644
index 0000000000..c9b41d26e3
--- /dev/null
+++ b/utils/rk27utils/nandextract/nandextract.c
@@ -0,0 +1,235 @@
1#include <stdio.h>
2#include <stdint.h>
3#include <stdbool.h>
4#include <stdlib.h>
5#include <string.h>
6#include "libbch.h"
7
8#define SECTOR_DATA_SIZE 512
9#define SECTOR_META_SIZE 3
10#define SECTOR_ECC_SIZE 13
11#define SECTOR_SIZE (SECTOR_DATA_SIZE + SECTOR_META_SIZE + SECTOR_ECC_SIZE)
12
13/* scramble mode */
14enum {
15 CONTINOUS_ENC, /* scramble whole block at once */
16 PAGE_ENC /* nand bootloader is scrambled in 0x200 chunks */
17};
18
19static uint8_t reverse_bits(uint8_t b)
20{
21 return (((b & 0x80) >> 7)|
22 ((b & 0x40) >> 5)|
23 ((b & 0x20) >> 3)|
24 ((b & 0x10) >> 1)|
25 ((b & 0x08) << 1)|
26 ((b & 0x04) << 3)|
27 ((b & 0x02) << 5)|
28 ((b & 0x01) << 7));
29}
30
31static int libbch_decode_sec(struct bch_control *bch, uint8_t *inbuf, uint8_t *outbuf)
32{
33 unsigned int errloc[8];
34 static const uint8_t mask[13] = {
35 0x4e, 0x8c, 0x9d, 0x52,
36 0x2d, 0x6c, 0x7c, 0xcb,
37 0xc3, 0x12, 0x14, 0x19,
38 0x37,
39 };
40
41 int i, err_num = 0;
42
43 /* ecc masking polynomial */
44 for (i=0; i<SECTOR_ECC_SIZE; i++)
45 inbuf[SECTOR_DATA_SIZE+SECTOR_META_SIZE+i] ^= mask[i];
46
47 /* fix ordering of input bits */
48 for (i = 0; i < SECTOR_SIZE; i++)
49 inbuf[i] = reverse_bits(inbuf[i]);
50
51 err_num = decode_bch(bch, inbuf,
52 (SECTOR_SIZE - SECTOR_ECC_SIZE),
53 &inbuf[SECTOR_SIZE - SECTOR_ECC_SIZE],
54 NULL, NULL, errloc);
55
56 /* apply fixups */
57 for(i=0; i<err_num; i++)
58 inbuf[errloc[i]/8] ^= 1 << (errloc[i] % 8);
59
60 /* reverse bits back (data part only), remining bytes are scratched */
61 for (i = 0; i < SECTOR_DATA_SIZE; i++)
62 outbuf[i] = reverse_bits(inbuf[i]);
63
64 return err_num;
65}
66
67/* scrambling/descrambling reverse engineered by AleMaxx */
68static void encode_page(uint8_t *inpg, uint8_t *outpg, const int size)
69{
70
71uint8_t key[] = {
72 0x7C, 0x4E, 0x03, 0x04,
73 0x55, 0x05, 0x09, 0x07,
74 0x2D, 0x2C, 0x7B, 0x38,
75 0x17, 0x0D, 0x17, 0x11
76};
77 int i, i3, x, val, idx;
78
79 uint8_t key1[0x100];
80 uint8_t key2[0x100];
81
82 for (i=0; i<0x100; i++) {
83 key1[i] = i;
84 key2[i] = key[i&0xf];
85 }
86
87 i3 = 0;
88 for (i=0; i<0x100; i++) {
89 x = key1[i];
90 i3 = key1[i] + i3;
91 i3 += key2[i];
92 i3 &= 0xff;
93 key1[i] = key1[i3];
94 key1[i3] = x;
95 }
96
97 idx = 0;
98 for (i=0; i<size; i++) {
99 x = key1[(i+1) & 0xff];
100 val = x;
101 idx = (x + idx) & 0xff;
102 key1[(i+1) & 0xff] = key1[idx];
103 key1[idx] = (x & 0xff);
104 val = (key1[(i+1)&0xff] + x) & 0xff;
105 val = key1[val];
106 outpg[i] = val ^ inpg[i];
107 }
108}
109
110/* returns offset in bytes of the sector
111 * NOTE: bootrom assumes 4 secs per page (regardles of actual pagesize)
112 */
113static int offset(int sec_num, int page_size, int rom)
114{
115 int sec_per_page, page_num, page_offset;
116
117 if (rom)
118 sec_per_page = 4;
119 else
120 sec_per_page = page_size / SECTOR_SIZE;
121
122 page_num = sec_num / sec_per_page;
123 page_offset = sec_num % sec_per_page;
124
125 printf("Sec per page: %d\n", sec_per_page);
126 printf("Page num: %d\n", page_num);
127 printf("Offset in page (sec): %d\n", page_offset);
128 printf("Offset in file (bytes): %d (0x%0x)\n", (page_num * page_size) +
129 (page_offset * SECTOR_SIZE),
130 (page_num * page_size) +
131 (page_offset * SECTOR_SIZE));
132
133 return ((page_num * page_size) + (page_offset * SECTOR_SIZE));
134}
135
136static int sector_read(FILE *fp, void *buff, int sec_num, int nand_page_size, struct bch_control *bch)
137{
138 int ret;
139 int file_offset = offset(sec_num, nand_page_size, 1);
140 uint8_t inbuf[SECTOR_SIZE];
141 uint8_t outbuf[SECTOR_SIZE];
142
143 if (fp == NULL)
144 return -1;
145
146 /* seek to the begining of the data */
147 fseek(fp, file_offset, SEEK_SET);
148
149 /* read into the buffer */
150 ret = fread(inbuf, 1, SECTOR_SIZE, fp);
151
152 if (ret != SECTOR_SIZE)
153 {
154 return -2;
155 }
156
157 ret = libbch_decode_sec(bch, inbuf, outbuf);
158
159 if (ret)
160 {
161 printf("LIBBCH Data %d error(s) in sector %d\n", ret, sec_num);
162 }
163
164 memcpy(buff, outbuf, SECTOR_DATA_SIZE);
165 return ret;
166}
167
168int main (int argc, char **argv)
169{
170 FILE *ifp, *ofp;
171 void *obuf;
172 int i, size, sector, num_sectors, nand_page_size;
173 char *infile, *outfile, *ptr;
174
175 struct bch_control *bch;
176
177 if (argc < 6)
178 {
179 printf("Usage: %s infile outfile start_sector num_sectors nand_page_size\n", argv[0]);
180 return 0;
181 }
182
183 infile = argv[1];
184 outfile = argv[2];
185 sector = atoi(argv[3]);
186 num_sectors = atoi(argv[4]);
187 nand_page_size = atoi(argv[5]);
188
189 size = SECTOR_DATA_SIZE * num_sectors;
190 obuf = malloc(size);
191
192 if (obuf == NULL)
193 {
194 printf("Error allocating %d bytes of buffer\n", size);
195 return -1;
196 }
197
198 ifp = fopen(infile, "rb");
199
200 if (ifp == NULL)
201 {
202 printf("Cannot open %s file\n", infile);
203 free(obuf);
204 return -2;
205 }
206
207 ofp = fopen(outfile, "wb");
208
209 if (ifp == NULL)
210 {
211 printf("Cannot open %s file\n", outfile);
212 fclose(ifp);
213 free(obuf);
214 return -3;
215 }
216
217 bch = init_bch(13, 8, 0x25af);
218
219 ptr = (char *)obuf;
220 for(i=0; i<num_sectors; i++)
221 {
222 sector_read(ifp, ptr, sector++, nand_page_size, bch);
223 encode_page((uint8_t *)ptr, (uint8_t *)ptr, SECTOR_DATA_SIZE);
224 ptr += SECTOR_DATA_SIZE;
225 }
226
227 fwrite(obuf, 1, size, ofp);
228
229 fclose(ifp);
230 fclose(ofp);
231 free(obuf);
232
233 free_bch(bch);
234 return 0;
235}