From 44bb2856a59be53ef5ede154a39c54a59b1cc6d0 Mon Sep 17 00:00:00 2001
From: Amaury Pouly <amaury.pouly@gmail.com>
Date: Fri, 11 Nov 2016 15:40:56 +0100
Subject: nwztools/database: add database of information on Sony NWZ linux
 players

There must be an evil genius in Sony's Walkman division. Someone who made sure
that each model is close enough to the previous one so that little code is needed
but different enough so that an educated guess is not enough.

Each linux-based Sony player has a model ID (mid) which is a 32-bit integer.
I was able to extract a list of all model IDs and the correspoding name of
the player (see README). This gives us 1) a nice list of all players (because
NWZ-A729 vs NWZ-A729B, really Sony?) 2) an easy way to find the name of player
programatically. It seems that the lower 8-bit of the model ID gives the storage
size but don't bet your life on it. The remaining bytes seem to follow some kind
of pattern but there are exceptions.

From this list, I was able to build a list of all Sony's series (up to quite
recent one). The only safe way to build that is by hand, with a list of series,
each series having a list of model IDs. The notion of series is very important
because all models in a series share the same firmware.

A very important concept on Sony's players is the NVP, an area of the flash
that stores data associated with keys. The README contains more information but
basically this is where is record the model ID, the destination, the boot flags,
the firmware upgrade flags, the boot image, the DRM keys, and a lot of other stuff.
Of course Sony decided to slightly tweak the index of the keys regularly over time
which means that each series has a potentially different map, and we need this map
to talk to the NVP driver. Fortunately, Sony distributes the kernel for all its
players and they contain a kernel header with this information. I wrote a script
to unpack kernel sources and parse this header, producing a bunch of nw-*.txt
files, included in this commit. This map is very specific though: it maps Sony's
3-letter names (bti) to indexes (1). This is not very useful without the
decription (bti = boot image) and its size (262144). This information is harder
to come by, and is only stored in one place: if icx_nvp_emmc.ko drivers, found
on the device. Fortunately, Sony distributes a number of firmware upgrade, that
contain the rootfs, than once extracted contain this driver. The driver is a
standard ELF files with symbols. I wrote a parsing tool (nvptool) that is able
to extract this information from the drivers. Using that, I produced a bunch
of nodes-nw*.txt files. A reasonable assumption is that nodes meaning and
size do not change over time (bti is always the boot image and is always
262144 bytes), so by merging a few of those file, we can get a complete picture
(note that some nodes that existed in older player do not exists anymore so
we really need to merge several ones from different generations).

The advantage of storing all this information in plain text files, is that it
now makes it easy to parse it and produce whatever format we want to use it.
I wrote a python script that parses all this mess and produces a C file and
header with all this information (nwz_db.{c,h}).

Change-Id: Id790581ddd527d64418fe9e4e4df8e0546117b80
---
 utils/nwztools/database/gen_db.py | 317 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 317 insertions(+)
 create mode 100755 utils/nwztools/database/gen_db.py

(limited to 'utils/nwztools/database/gen_db.py')

diff --git a/utils/nwztools/database/gen_db.py b/utils/nwztools/database/gen_db.py
new file mode 100755
index 0000000000..61c08f919a
--- /dev/null
+++ b/utils/nwztools/database/gen_db.py
@@ -0,0 +1,317 @@
+#!/usr/bin/python3
+import glob
+import os
+import re
+import subprocess
+
+# parse models.txt
+g_models = []
+with open('models.txt') as fp:
+    for line in fp:
+        # we unpack and repack 1) to make the format obvious 2) to catch errors
+        mid,name = line.rstrip().split(",")
+        g_models.append({'mid': int(mid, 0), 'name': name})
+# parse series.txt
+g_series = []
+with open('series.txt') as fp:
+    for line in fp:
+        # we unpack and repack 1) to make the format obvious 2) to catch errors
+        arr = line.rstrip().split(",")
+        codename = arr[0]
+        name = arr[1]
+        models = arr[2:]
+        # handle empty list
+        if len(models) == 1 and models[0] == "":
+            models = []
+        models = [int(mid,0) for mid in models]
+        g_series.append({'codename': codename, 'name': name, 'models': models})
+# parse all maps in nvp/
+# since most nvps are the same, what we actually do is to compute the md5sum hash
+# of all files, to identify groups and then each entry in the name is in fact the
+# hash, and we only parse one file per hash group
+g_hash_nvp = dict() # hash -> nvp
+g_nvp_hash = dict() # codename -> hash
+HASH_SIZE=6
+map_files = glob.glob('nvp/nw*.txt')
+for line in subprocess.run(["md5sum"] + map_files, stdout = subprocess.PIPE).stdout.decode("utf-8").split("\n"):
+    if len(line.rstrip()) == 0:
+        continue
+    hash, file = line.rstrip().split()
+    codename = re.search('nvp/(.*)\.txt', file).group(1)
+    hash = hash[:HASH_SIZE]
+    # only keep one file
+    if not (hash in g_hash_nvp):
+        g_hash_nvp[hash] = set()
+    g_hash_nvp[hash].add(codename);
+    g_nvp_hash[codename] = hash
+# we have some file nodes (nodes-*) but not necessarily for all series
+# so for each hash group, try to find at least one
+for hash in g_hash_nvp:
+    # look at all codename and see if we can find one with a node file
+    node_codename = ""
+    for codename in g_hash_nvp[hash]:
+        if os.path.isfile("nvp/nodes-%s.txt" % codename):
+            node_codename = codename
+            break
+    # if we didn't find one, we just keep the first one
+    # otherwise keep the one we found
+    if node_codename == "":
+        node_codename = g_hash_nvp[hash].pop()
+    g_hash_nvp[hash] = node_codename
+# for each entry in g_hash_nvp, replace the file name by the actual table
+# that we parse, and compute all nvp names at the same time
+g_nvp_names = set() # set of all nvp names
+g_nvp_desc = dict() # name -> set of all description of a node
+g_nvp_size = dict() # name -> set of all possible sizes of a node
+for hash in g_hash_nvp:
+    codename = g_hash_nvp[hash]
+    # extract codename from file
+    # parse file
+    map = dict()
+    with open("nvp/%s.txt" % codename) as fp:
+        for line in fp:
+            # we unpack and repack 1) to make the format obvious 2) to catch errors
+            name,index = line.rstrip().split(",")
+            # convert node to integer but be careful of leading 0 (ie 010 is actually
+            # 10 in decimal, it is not in octal)
+            index = int(index, 10)
+            map[index] = name
+            g_nvp_names.add(name)
+    # parse node map if any
+    node_map = dict()
+    if os.path.isfile("nvp/nodes-%s.txt" % codename):
+        with open("nvp/nodes-%s.txt" % codename) as fp:
+            for line in fp:
+                # we unpack and repack 1) to make the format obvious 2) to catch errors
+                index,size,desc = line.rstrip().split(",")
+                # convert node to integer but be careful of leading 0 (ie 010 is actually
+                # 10 in decimal, it is not in octal)
+                index = int(index, 10)
+                desc = desc.rstrip()
+                node_map[index] = {'size': size, 'desc': desc}
+    # compute final nvp
+    nvp = dict()
+    for index in map:
+        size = 0
+        desc = ""
+        name = map[index]
+        if index in node_map:
+            size = node_map[index]["size"]
+            desc = node_map[index]["desc"]
+        nvp[name] = index
+        if not (name in g_nvp_desc):
+            g_nvp_desc[name] = set()
+        if len(desc) != 0:
+            g_nvp_desc[name].add(desc)
+        if not (name in g_nvp_size):
+            g_nvp_size[name] = set()
+        if size != 0:
+            g_nvp_size[name].add(size)
+    g_hash_nvp[hash] = nvp
+
+#
+# generate header
+#
+header_begin = \
+"""\
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \\
+ *                     \/            \/     \/    \/            \/
+ *
+ * Copyright (C) 2016 Amaury Pouly
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#ifndef __NWZ_DB_H__
+#define __NWZ_DB_H__
+
+/** /!\ This file was automatically generated, DO NOT MODIFY IT DIRECTLY /!\ */
+
+/* List of all known NVP nodes */
+enum nwz_nvp_node_t
+{
+"""
+
+header_end = \
+"""\
+    NWZ_NVP_COUNT /* Number of nvp nodes */
+};
+
+/* Invalid NVP index */
+#define NWZ_NVP_INVALID     -1 /* Non-existent entry */
+/* Number of models */
+#define NWZ_MODEL_COUNT     %s
+/* Number of series */
+#define NWZ_SERIES_COUNT    %s
+
+/* NVP node info */
+struct nwz_nvp_info_t
+{
+    const char *name; /* Sony's name: "bti" */
+    unsigned long size; /* Size in bytes */
+    const char *desc; /* Description: "bootloader image" */
+};
+
+/* NVP index map (nwz_nvp_node_t -> index) */
+typedef int nwz_nvp_index_t[NWZ_NVP_COUNT];
+
+/* Model info */
+struct nwz_model_info_t
+{
+    unsigned long mid; /* Model ID: first 4 bytes of the NVP mid entry */
+    const char *name; /* Human name: "NWZ-E463" */
+};
+
+/* Series info */
+struct nwz_series_info_t
+{
+    const char *codename; /* Rockbox codename: nwz-e460 */
+    const char *name; /* Human name: "NWZ-E460 Series" */
+    int mid_count; /* number of entries in mid_list */
+    unsigned long *mid; /* List of model IDs */
+    /* Pointer to a name -> index map, nonexistent entries map to NWZ_NVP_INVALID */
+    nwz_nvp_index_t *nvp_index;
+};
+
+/* List of all NVP entries, indexed by nwz_nvp_node_t */
+extern struct nwz_nvp_info_t nwz_nvp[NWZ_NVP_COUNT];
+/* List of all models, sorted by increasing values of model ID */
+extern struct nwz_model_info_t nwz_model[NWZ_MODEL_COUNT];
+/* List of all series */
+extern struct nwz_series_info_t nwz_series[NWZ_SERIES_COUNT];
+
+#endif /* __NWZ_DB_H__ */
+"""
+
+with open("nwz_db.h", "w") as fp:
+    fp.write(header_begin)
+    # generate list of all nvp nodes
+    for name in sorted(g_nvp_names):
+        # create comment to explain the meaning, gather several meaning together
+        # if there are more than one
+        explain = ""
+        if name in g_nvp_desc:
+            explain = " | ".join(list(g_nvp_desc[name]))
+        # overwrite desc set with a single string for later
+        g_nvp_desc[name] = explain
+        fp.write("    NWZ_NVP_%s, /* %s */\n" % (name.upper(), explain))
+    fp.write(header_end % (len(g_models), len(g_series)))
+
+#
+# generate tables
+#
+impl_begin = \
+"""\
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \\
+ *                     \/            \/     \/    \/            \/
+ *
+ * Copyright (C) 2016 Amaury Pouly
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+/** /!\ This file was automatically generated, DO NOT MODIFY IT DIRECTLY /!\ */
+
+#include "nwz_db.h"
+
+struct nwz_model_info_t nwz_model[NWZ_MODEL_COUNT] =
+{
+"""
+
+def by_mid(model):
+    return model["mid"]
+
+def by_name(nvp_entry):
+    return nvp_entry["name"]
+
+def codename_to_c(codename):
+    return re.sub('[^a-zA-Z0-9]', '_', codename, 0)
+
+with open("nwz_db.c", "w") as fp:
+    fp.write(impl_begin)
+    # generate model list (sort by mid)
+    for model in sorted(g_models, key = by_mid):
+        fp.write("    { %s, \"%s\" },\n" % (hex(model["mid"]), model["name"]))
+    fp.write("};\n")
+    # generate nvps
+    for hash in g_hash_nvp:
+        nvp = g_hash_nvp[hash]
+        fp.write("\nstatic int nvp_index_%s[NWZ_NVP_COUNT] =\n" % hash)
+        fp.write("{\n")
+        for name in sorted(g_nvp_names):
+            index = "NWZ_NVP_INVALID"
+            if name in nvp:
+                index = nvp[name]
+            fp.write("    [NWZ_NVP_%s] = %s,\n" % (name.upper(), index))
+        fp.write("};\n")
+    # generate nvp info
+    fp.write("\nstruct nwz_nvp_info_t nwz_nvp[NWZ_NVP_COUNT] =\n")
+    fp.write("{\n")
+    for name in sorted(g_nvp_names):
+        size = 0
+        if name in g_nvp_size:
+            size_set = g_nvp_size[name]
+            if len(size_set) == 0:
+                size = 0
+            elif len(size_set) == 1:
+                size = next(iter(size_set))
+            else:
+                print("Warning: nvp node \"%s\" has several possible sizes: %s"
+                    % (name, size_set))
+                size = 0
+        desc = ""
+        if name in g_nvp_desc:
+            desc = g_nvp_desc[name]
+        fp.write("    [NWZ_NVP_%s] = { \"%s\", %s, \"%s\" },\n" % (name.upper(),
+            name, size, desc))
+    fp.write("};\n")
+    # generate list of models for each series
+    for series in g_series:
+        c_codename = codename_to_c(series["codename"])
+        list = [hex(mid) for mid in series["models"]]
+        limit = 3
+        c_list = ""
+        while len(list) != 0:
+            if len(list) <= limit:
+                c_list = c_list + ", ".join(list)
+                list = []
+            else:
+                c_list = c_list + ", ".join(list[:limit]) + ",\n    "
+                list = list[limit:]
+                limit = 6
+        fp.write("\nstatic unsigned long models_%s[] = { %s };\n" % (c_codename, c_list))
+    # generate series list
+    fp.write("\nstruct nwz_series_info_t nwz_series[NWZ_SERIES_COUNT] =\n{\n")
+    for series in g_series:
+        name = series["name"]
+        codename = series["codename"]
+        c_codename = codename_to_c(codename)
+        nvp = "0"
+        if codename in g_nvp_hash:
+            nvp = "&nvp_index_%s" % g_nvp_hash[codename]
+        fp.write("    { \"%s\", \"%s\", %s, models_%s, %s },\n" % (codename,
+            name, len(series["models"]), c_codename, nvp))
+    fp.write("};\n")
-- 
cgit v1.2.3