summaryrefslogtreecommitdiff
path: root/utils/nwztools/database/gen_db.py
diff options
context:
space:
mode:
authorAmaury Pouly <amaury.pouly@gmail.com>2016-11-11 15:40:56 +0100
committerAmaury Pouly <amaury.pouly@gmail.com>2016-11-11 16:07:14 +0100
commit44bb2856a59be53ef5ede154a39c54a59b1cc6d0 (patch)
treece343ecff309d6d0172ea56946a9ce679329b250 /utils/nwztools/database/gen_db.py
parent19de536ce2f3c8066ca5be9b570f72e5c1e88342 (diff)
downloadrockbox-44bb2856a59be53ef5ede154a39c54a59b1cc6d0.tar.gz
rockbox-44bb2856a59be53ef5ede154a39c54a59b1cc6d0.zip
nwztools/database: add database of information on Sony NWZ linux players
There must be an evil genius in Sony's Walkman division. Someone who made sure that each model is close enough to the previous one so that little code is needed but different enough so that an educated guess is not enough. Each linux-based Sony player has a model ID (mid) which is a 32-bit integer. I was able to extract a list of all model IDs and the correspoding name of the player (see README). This gives us 1) a nice list of all players (because NWZ-A729 vs NWZ-A729B, really Sony?) 2) an easy way to find the name of player programatically. It seems that the lower 8-bit of the model ID gives the storage size but don't bet your life on it. The remaining bytes seem to follow some kind of pattern but there are exceptions. From this list, I was able to build a list of all Sony's series (up to quite recent one). The only safe way to build that is by hand, with a list of series, each series having a list of model IDs. The notion of series is very important because all models in a series share the same firmware. A very important concept on Sony's players is the NVP, an area of the flash that stores data associated with keys. The README contains more information but basically this is where is record the model ID, the destination, the boot flags, the firmware upgrade flags, the boot image, the DRM keys, and a lot of other stuff. Of course Sony decided to slightly tweak the index of the keys regularly over time which means that each series has a potentially different map, and we need this map to talk to the NVP driver. Fortunately, Sony distributes the kernel for all its players and they contain a kernel header with this information. I wrote a script to unpack kernel sources and parse this header, producing a bunch of nw-*.txt files, included in this commit. This map is very specific though: it maps Sony's 3-letter names (bti) to indexes (1). This is not very useful without the decription (bti = boot image) and its size (262144). This information is harder to come by, and is only stored in one place: if icx_nvp_emmc.ko drivers, found on the device. Fortunately, Sony distributes a number of firmware upgrade, that contain the rootfs, than once extracted contain this driver. The driver is a standard ELF files with symbols. I wrote a parsing tool (nvptool) that is able to extract this information from the drivers. Using that, I produced a bunch of nodes-nw*.txt files. A reasonable assumption is that nodes meaning and size do not change over time (bti is always the boot image and is always 262144 bytes), so by merging a few of those file, we can get a complete picture (note that some nodes that existed in older player do not exists anymore so we really need to merge several ones from different generations). The advantage of storing all this information in plain text files, is that it now makes it easy to parse it and produce whatever format we want to use it. I wrote a python script that parses all this mess and produces a C file and header with all this information (nwz_db.{c,h}). Change-Id: Id790581ddd527d64418fe9e4e4df8e0546117b80
Diffstat (limited to 'utils/nwztools/database/gen_db.py')
-rwxr-xr-xutils/nwztools/database/gen_db.py317
1 files changed, 317 insertions, 0 deletions
diff --git a/utils/nwztools/database/gen_db.py b/utils/nwztools/database/gen_db.py
new file mode 100755
index 0000000000..61c08f919a
--- /dev/null
+++ b/utils/nwztools/database/gen_db.py
@@ -0,0 +1,317 @@
1#!/usr/bin/python3
2import glob
3import os
4import re
5import subprocess
6
7# parse models.txt
8g_models = []
9with open('models.txt') as fp:
10 for line in fp:
11 # we unpack and repack 1) to make the format obvious 2) to catch errors
12 mid,name = line.rstrip().split(",")
13 g_models.append({'mid': int(mid, 0), 'name': name})
14# parse series.txt
15g_series = []
16with open('series.txt') as fp:
17 for line in fp:
18 # we unpack and repack 1) to make the format obvious 2) to catch errors
19 arr = line.rstrip().split(",")
20 codename = arr[0]
21 name = arr[1]
22 models = arr[2:]
23 # handle empty list
24 if len(models) == 1 and models[0] == "":
25 models = []
26 models = [int(mid,0) for mid in models]
27 g_series.append({'codename': codename, 'name': name, 'models': models})
28# parse all maps in nvp/
29# since most nvps are the same, what we actually do is to compute the md5sum hash
30# of all files, to identify groups and then each entry in the name is in fact the
31# hash, and we only parse one file per hash group
32g_hash_nvp = dict() # hash -> nvp
33g_nvp_hash = dict() # codename -> hash
34HASH_SIZE=6
35map_files = glob.glob('nvp/nw*.txt')
36for line in subprocess.run(["md5sum"] + map_files, stdout = subprocess.PIPE).stdout.decode("utf-8").split("\n"):
37 if len(line.rstrip()) == 0:
38 continue
39 hash, file = line.rstrip().split()
40 codename = re.search('nvp/(.*)\.txt', file).group(1)
41 hash = hash[:HASH_SIZE]
42 # only keep one file
43 if not (hash in g_hash_nvp):
44 g_hash_nvp[hash] = set()
45 g_hash_nvp[hash].add(codename);
46 g_nvp_hash[codename] = hash
47# we have some file nodes (nodes-*) but not necessarily for all series
48# so for each hash group, try to find at least one
49for hash in g_hash_nvp:
50 # look at all codename and see if we can find one with a node file
51 node_codename = ""
52 for codename in g_hash_nvp[hash]:
53 if os.path.isfile("nvp/nodes-%s.txt" % codename):
54 node_codename = codename
55 break
56 # if we didn't find one, we just keep the first one
57 # otherwise keep the one we found
58 if node_codename == "":
59 node_codename = g_hash_nvp[hash].pop()
60 g_hash_nvp[hash] = node_codename
61# for each entry in g_hash_nvp, replace the file name by the actual table
62# that we parse, and compute all nvp names at the same time
63g_nvp_names = set() # set of all nvp names
64g_nvp_desc = dict() # name -> set of all description of a node
65g_nvp_size = dict() # name -> set of all possible sizes of a node
66for hash in g_hash_nvp:
67 codename = g_hash_nvp[hash]
68 # extract codename from file
69 # parse file
70 map = dict()
71 with open("nvp/%s.txt" % codename) as fp:
72 for line in fp:
73 # we unpack and repack 1) to make the format obvious 2) to catch errors
74 name,index = line.rstrip().split(",")
75 # convert node to integer but be careful of leading 0 (ie 010 is actually
76 # 10 in decimal, it is not in octal)
77 index = int(index, 10)
78 map[index] = name
79 g_nvp_names.add(name)
80 # parse node map if any
81 node_map = dict()
82 if os.path.isfile("nvp/nodes-%s.txt" % codename):
83 with open("nvp/nodes-%s.txt" % codename) as fp:
84 for line in fp:
85 # we unpack and repack 1) to make the format obvious 2) to catch errors
86 index,size,desc = line.rstrip().split(",")
87 # convert node to integer but be careful of leading 0 (ie 010 is actually
88 # 10 in decimal, it is not in octal)
89 index = int(index, 10)
90 desc = desc.rstrip()
91 node_map[index] = {'size': size, 'desc': desc}
92 # compute final nvp
93 nvp = dict()
94 for index in map:
95 size = 0
96 desc = ""
97 name = map[index]
98 if index in node_map:
99 size = node_map[index]["size"]
100 desc = node_map[index]["desc"]
101 nvp[name] = index
102 if not (name in g_nvp_desc):
103 g_nvp_desc[name] = set()
104 if len(desc) != 0:
105 g_nvp_desc[name].add(desc)
106 if not (name in g_nvp_size):
107 g_nvp_size[name] = set()
108 if size != 0:
109 g_nvp_size[name].add(size)
110 g_hash_nvp[hash] = nvp
111
112#
113# generate header
114#
115header_begin = \
116"""\
117/***************************************************************************
118 * __________ __ ___.
119 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
120 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
121 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
122 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \\
123 * \/ \/ \/ \/ \/
124 *
125 * Copyright (C) 2016 Amaury Pouly
126 *
127 * This program is free software; you can redistribute it and/or
128 * modify it under the terms of the GNU General Public License
129 * as published by the Free Software Foundation; either version 2
130 * of the License, or (at your option) any later version.
131 *
132 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
133 * KIND, either express or implied.
134 *
135 ****************************************************************************/
136#ifndef __NWZ_DB_H__
137#define __NWZ_DB_H__
138
139/** /!\ This file was automatically generated, DO NOT MODIFY IT DIRECTLY /!\ */
140
141/* List of all known NVP nodes */
142enum nwz_nvp_node_t
143{
144"""
145
146header_end = \
147"""\
148 NWZ_NVP_COUNT /* Number of nvp nodes */
149};
150
151/* Invalid NVP index */
152#define NWZ_NVP_INVALID -1 /* Non-existent entry */
153/* Number of models */
154#define NWZ_MODEL_COUNT %s
155/* Number of series */
156#define NWZ_SERIES_COUNT %s
157
158/* NVP node info */
159struct nwz_nvp_info_t
160{
161 const char *name; /* Sony's name: "bti" */
162 unsigned long size; /* Size in bytes */
163 const char *desc; /* Description: "bootloader image" */
164};
165
166/* NVP index map (nwz_nvp_node_t -> index) */
167typedef int nwz_nvp_index_t[NWZ_NVP_COUNT];
168
169/* Model info */
170struct nwz_model_info_t
171{
172 unsigned long mid; /* Model ID: first 4 bytes of the NVP mid entry */
173 const char *name; /* Human name: "NWZ-E463" */
174};
175
176/* Series info */
177struct nwz_series_info_t
178{
179 const char *codename; /* Rockbox codename: nwz-e460 */
180 const char *name; /* Human name: "NWZ-E460 Series" */
181 int mid_count; /* number of entries in mid_list */
182 unsigned long *mid; /* List of model IDs */
183 /* Pointer to a name -> index map, nonexistent entries map to NWZ_NVP_INVALID */
184 nwz_nvp_index_t *nvp_index;
185};
186
187/* List of all NVP entries, indexed by nwz_nvp_node_t */
188extern struct nwz_nvp_info_t nwz_nvp[NWZ_NVP_COUNT];
189/* List of all models, sorted by increasing values of model ID */
190extern struct nwz_model_info_t nwz_model[NWZ_MODEL_COUNT];
191/* List of all series */
192extern struct nwz_series_info_t nwz_series[NWZ_SERIES_COUNT];
193
194#endif /* __NWZ_DB_H__ */
195"""
196
197with open("nwz_db.h", "w") as fp:
198 fp.write(header_begin)
199 # generate list of all nvp nodes
200 for name in sorted(g_nvp_names):
201 # create comment to explain the meaning, gather several meaning together
202 # if there are more than one
203 explain = ""
204 if name in g_nvp_desc:
205 explain = " | ".join(list(g_nvp_desc[name]))
206 # overwrite desc set with a single string for later
207 g_nvp_desc[name] = explain
208 fp.write(" NWZ_NVP_%s, /* %s */\n" % (name.upper(), explain))
209 fp.write(header_end % (len(g_models), len(g_series)))
210
211#
212# generate tables
213#
214impl_begin = \
215"""\
216/***************************************************************************
217 * __________ __ ___.
218 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
219 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
220 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
221 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \\
222 * \/ \/ \/ \/ \/
223 *
224 * Copyright (C) 2016 Amaury Pouly
225 *
226 * This program is free software; you can redistribute it and/or
227 * modify it under the terms of the GNU General Public License
228 * as published by the Free Software Foundation; either version 2
229 * of the License, or (at your option) any later version.
230 *
231 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
232 * KIND, either express or implied.
233 *
234 ****************************************************************************/
235
236/** /!\ This file was automatically generated, DO NOT MODIFY IT DIRECTLY /!\ */
237
238#include "nwz_db.h"
239
240struct nwz_model_info_t nwz_model[NWZ_MODEL_COUNT] =
241{
242"""
243
244def by_mid(model):
245 return model["mid"]
246
247def by_name(nvp_entry):
248 return nvp_entry["name"]
249
250def codename_to_c(codename):
251 return re.sub('[^a-zA-Z0-9]', '_', codename, 0)
252
253with open("nwz_db.c", "w") as fp:
254 fp.write(impl_begin)
255 # generate model list (sort by mid)
256 for model in sorted(g_models, key = by_mid):
257 fp.write(" { %s, \"%s\" },\n" % (hex(model["mid"]), model["name"]))
258 fp.write("};\n")
259 # generate nvps
260 for hash in g_hash_nvp:
261 nvp = g_hash_nvp[hash]
262 fp.write("\nstatic int nvp_index_%s[NWZ_NVP_COUNT] =\n" % hash)
263 fp.write("{\n")
264 for name in sorted(g_nvp_names):
265 index = "NWZ_NVP_INVALID"
266 if name in nvp:
267 index = nvp[name]
268 fp.write(" [NWZ_NVP_%s] = %s,\n" % (name.upper(), index))
269 fp.write("};\n")
270 # generate nvp info
271 fp.write("\nstruct nwz_nvp_info_t nwz_nvp[NWZ_NVP_COUNT] =\n")
272 fp.write("{\n")
273 for name in sorted(g_nvp_names):
274 size = 0
275 if name in g_nvp_size:
276 size_set = g_nvp_size[name]
277 if len(size_set) == 0:
278 size = 0
279 elif len(size_set) == 1:
280 size = next(iter(size_set))
281 else:
282 print("Warning: nvp node \"%s\" has several possible sizes: %s"
283 % (name, size_set))
284 size = 0
285 desc = ""
286 if name in g_nvp_desc:
287 desc = g_nvp_desc[name]
288 fp.write(" [NWZ_NVP_%s] = { \"%s\", %s, \"%s\" },\n" % (name.upper(),
289 name, size, desc))
290 fp.write("};\n")
291 # generate list of models for each series
292 for series in g_series:
293 c_codename = codename_to_c(series["codename"])
294 list = [hex(mid) for mid in series["models"]]
295 limit = 3
296 c_list = ""
297 while len(list) != 0:
298 if len(list) <= limit:
299 c_list = c_list + ", ".join(list)
300 list = []
301 else:
302 c_list = c_list + ", ".join(list[:limit]) + ",\n "
303 list = list[limit:]
304 limit = 6
305 fp.write("\nstatic unsigned long models_%s[] = { %s };\n" % (c_codename, c_list))
306 # generate series list
307 fp.write("\nstruct nwz_series_info_t nwz_series[NWZ_SERIES_COUNT] =\n{\n")
308 for series in g_series:
309 name = series["name"]
310 codename = series["codename"]
311 c_codename = codename_to_c(codename)
312 nvp = "0"
313 if codename in g_nvp_hash:
314 nvp = "&nvp_index_%s" % g_nvp_hash[codename]
315 fp.write(" { \"%s\", \"%s\", %s, models_%s, %s },\n" % (codename,
316 name, len(series["models"]), c_codename, nvp))
317 fp.write("};\n")