codecs: m4a: improve seek accuracy

Seeking doesn't work well in M4A files with very few chunks due to the seek method used (chunk based using the info in the 'stco' atom). According to libm4a/demux.c the expected seek resolution using this method is 1/4 to 1/2 seconds. However, ffmpeg generates files with a 1 megabyte chunk size, so the resolution is much worse than expected on some files: around 30-40 seconds at 256kbps. There was a bug with the seek position reported back to Rockbox: the codec pretended it could seek exactly to the requested sample, but it would only seek to the start of a chunk. This could leave the UI in a confusing state because the real playback position was different from what the elapsed time showed. Fix this by recalculating the reported sample position using the chunk start. To fix the low seek accuracy, use the table in the 'stsz' atom to skip individual packets within a chunk. This is very accurate, but it takes a lot of RAM to allocate the table. Currently the table is not allowed to use more than half of the codec RAM, which should suffice for short files on most targets. On files where the table is too large the codec will fall back to the less accurate chunk-based seek method. Change-Id: Ide38ea846c1cdd69691e9b1e1cd87eb0fa11cf78
author: Aidan MacDonald <amachronic@protonmail.com> 2022-04-18 14:21:12 +0100
committer: Aidan MacDonald <amachronic@protonmail.com> 2022-05-02 12:16:21 -0400
commit: 4dd3c2b33ec3d181f54cc1bcf5b596401a8cfcbb (patch)
tree: fdccdcaed0d0dc3faea515daad410b5f3b064f02 /lib/rbcodec/codecs/libm4a/m4a.c
parent: b79eefc8583536da9faa87b50d82eaef8a3e0dde (diff)
download: rockbox-4dd3c2b33ec3d181f54cc1bcf5b596401a8cfcbb.tar.gz
rockbox-4dd3c2b33ec3d181f54cc1bcf5b596401a8cfcbb.zip
1 files changed, 96 insertions, 52 deletions
diff --git a/lib/rbcodec/codecs/libm4a/m4a.c b/lib/rbcodec/codecs/libm4a/m4a.c
index 5fe778ac03..b967e15e7a 100644
--- a/lib/rbcodec/codecs/libm4a/m4a.c
+++ b/lib/rbcodec/codecs/libm4a/m4a.c
@@ -23,6 +23,13 @@
 #include <inttypes.h>
 #include "m4a.h"
+#undef DEBUGF
+#if defined(DEBUG)
+#define DEBUGF stream->ci->debugf
+#else
+#define DEBUGF(...)
+#endif
 /* Implementation of the stream.h functions used by libalac */
 #define _Swap32(v) do { \
@@ -127,76 +134,113 @@ int m4a_check_sample_offset(demux_res_t *demux_res, uint32_t frame, uint32_t *st
    return demux_res->lookup_table[i].offset;
 }
-/* Find the exact or preceding frame in lookup_table[]. Return both frame
- * and byte position of this match. */
-static void gather_offset(demux_res_t *demux_res, uint32_t *frame, uint32_t *offset)
-{
-    uint32_t i = 0;
-    for (i=0; i<demux_res->num_lookup_table; ++i)
-    {
-        if (demux_res->lookup_table[i].offset == 0)
-            break;
-        if (demux_res->lookup_table[i].sample > *frame)
-            break;
-    }
-    i = (i>0) ? i-1 : 0; /* We want the last chunk _before_ *frame. */
-    *frame  = demux_res->lookup_table[i].sample;
-    *offset = demux_res->lookup_table[i].offset;
-}
 /* Seek to desired sound sample location. Return 1 on success (and modify
- * sound_samples_done and current_sample), 0 if failed.
+ * sound_samples_done and current_sample), 0 if failed. */
- *
- * Find the sample (=frame) that contains the given sound sample, find a best
- * fit for this sample in the lookup_table[], seek to the byte position. */
 unsigned int m4a_seek(demux_res_t* demux_res, stream_t* stream, 
    uint32_t sound_sample_loc, uint32_t* sound_samples_done, 
    int* current_sample)
 {
-    uint32_t i = 0;
+    uint32_t i, sample_i, sound_sample_i;
-    uint32_t tmp_var, tmp_cnt, tmp_dur;
+    uint32_t time, time_cnt, time_dur;
-    uint32_t new_sample = 0;       /* Holds the amount of chunks/frames. */
+    uint32_t chunk, chunk_first_sample;
-    uint32_t new_sound_sample = 0; /* Sums up total amount of samples. */
+    uint32_t offset;
-    uint32_t new_pos;              /* Holds the desired chunk/frame index. */
+    time_to_sample_t *tts_tab = demux_res->time_to_sample;
+    sample_offset_t *tco_tab = demux_res->lookup_table;
-    /* First check we have the appropriate metadata - we should always
+    uint32_t *tsz_tab = demux_res->sample_byte_sizes;
-     * have it.
-     */
+    /* First check we have the required metadata - we should always have it. */
    if (!demux_res->num_time_to_samples || !demux_res->num_sample_byte_sizes)
-    { 
+    {
-        return 0; 
+        return 0;
    }
-    /* Find the destination block from time_to_sample array */
+    /* The 'sound_sample_loc' we have is PCM-based and not directly usable.
-    time_to_sample_t *tab = demux_res->time_to_sample;
+     * We need to convert it to an MP4 sample number 'sample_i' first. */
-    while (i < demux_res->num_time_to_samples)
+    sample_i = sound_sample_i = 0;
+    for (time = 0; time < demux_res->num_time_to_samples; ++time)
    {
-        tmp_cnt = tab[i].sample_count;
+        time_cnt = tts_tab[time].sample_count;
-        tmp_dur = tab[i].sample_duration;
+        time_dur = tts_tab[time].sample_duration;
-        tmp_var = tmp_cnt * tmp_dur;
+        uint32_t time_var = time_cnt * time_dur;
-        if (sound_sample_loc <= new_sound_sample + tmp_var)
+        if (sound_sample_loc < sound_sample_i + time_var)
        {
-            tmp_var = (sound_sample_loc - new_sound_sample);
+            time_var = sound_sample_loc - sound_sample_i;
-            new_sample       += tmp_var / tmp_dur;
+            sample_i += time_var / time_dur;
-            new_sound_sample += tmp_var;
            break;
        }
-        new_sample       += tmp_cnt;
-        new_sound_sample += tmp_var;
+        sample_i       += time_cnt;
-        ++i;
+        sound_sample_i += time_var;
+    }
+    /* Find the chunk after 'sample_i'. */
+    for (chunk = 1; chunk < demux_res->num_lookup_table; ++chunk)
+    {
+        if (tco_tab[chunk].offset == 0)
+            break;
+        if (tco_tab[chunk].sample > sample_i)
+            break;
    }
-    /* We know the new sample (=frame), now calculate the file position. */
+    /* The preceding chunk is the one that contains 'sample_i'. */
-    gather_offset(demux_res, &new_sample, &new_pos);
+    chunk--;
+    chunk_first_sample = tco_tab[chunk].sample;
+    offset = tco_tab[chunk].offset;
-    /* We know the new file position, so let's try to seek to it */
+    /* Compute the PCM sample number of the chunk's first sample
-    if (stream->ci->seek_buffer(new_pos))
+     * to get an accurate base for sound_sample_i. */
+    i = sound_sample_i = 0;
+    for (time = 0; time < demux_res->num_time_to_samples; ++time)
    {
-        *sound_samples_done = new_sound_sample;
+        time_cnt = tts_tab[time].sample_count;
-        *current_sample = new_sample;
+        time_dur = tts_tab[time].sample_duration;
+        if (chunk_first_sample < i + time_cnt)
+        {
+            sound_sample_i += (chunk_first_sample - i) * time_dur;
+            break;
+        }
+        i += time_cnt;
+        sound_sample_i += time_cnt * time_dur;
+    }
+    DEBUGF("seek chunk=%lu, sample=%lu, soundsample=%lu, offset=%lu\n",
+           (unsigned long)chunk, (unsigned long)chunk_first_sample,
+           (unsigned long)sound_sample_i, (unsigned long)offset);
+    if (tsz_tab) {
+        /* We have a sample-to-bytes table available so we can do accurate
+         * seeking. Move one sample at a time and update the file offset and
+         * PCM sample offset as we go. */
+        for (i = chunk_first_sample;
+             i < sample_i && i < demux_res->num_sample_byte_sizes; ++i)
+        {
+            /* this could be unnecessary */
+            if (time_cnt == 0 && ++time < demux_res->num_time_to_samples)
+            {
+                time_cnt = tts_tab[time].sample_count;
+                time_dur = tts_tab[time].sample_duration;
+            }
+            offset += tsz_tab[i];
+            sound_sample_i += time_dur;
+            time_cnt--;
+        }
+    } else {
+        /* No sample-to-bytes table available so we can only seek to the
+         * start of a chunk, which is often much lower resolution. */
+        sample_i = chunk_first_sample;
+    }
+    if (stream->ci->seek_buffer(offset))
+    {
+        *sound_samples_done = sound_sample_i;
+        *current_sample = sample_i;
        return 1;
    }
-    
    return 0;
 }
author	Aidan MacDonald <amachronic@protonmail.com>	2022-04-18 14:21:12 +0100
committer	Aidan MacDonald <amachronic@protonmail.com>	2022-05-02 12:16:21 -0400
commit	4dd3c2b33ec3d181f54cc1bcf5b596401a8cfcbb (patch)
tree	fdccdcaed0d0dc3faea515daad410b5f3b064f02 /lib/rbcodec/codecs/libm4a/m4a.c
parent	b79eefc8583536da9faa87b50d82eaef8a3e0dde (diff)
download	rockbox-4dd3c2b33ec3d181f54cc1bcf5b596401a8cfcbb.tar.gz rockbox-4dd3c2b33ec3d181f54cc1bcf5b596401a8cfcbb.zip

diff --git a/lib/rbcodec/codecs/libm4a/m4a.c b/lib/rbcodec/codecs/libm4a/m4a.c index 5fe778ac03..b967e15e7a 100644 --- a/lib/rbcodec/codecs/libm4a/m4a.c +++ b/lib/rbcodec/codecs/libm4a/m4a.c
@@ -23,6 +23,13 @@
23	#include <inttypes.h>	23	#include <inttypes.h>
24	#include "m4a.h"	24	#include "m4a.h"
25		25
		26	#undef DEBUGF
		27	#if defined(DEBUG)
		28	#define DEBUGF stream->ci->debugf
		29	#else
		30	#define DEBUGF(...)
		31	#endif
		32
26	/* Implementation of the stream.h functions used by libalac */	33	/* Implementation of the stream.h functions used by libalac */
27		34
28	#define _Swap32(v) do { \	35	#define _Swap32(v) do { \
@@ -127,76 +134,113 @@ int m4a_check_sample_offset(demux_res_t demux_res, uint32_t frame, uint32_t st
127	return demux_res->lookup_table[i].offset;	134	return demux_res->lookup_table[i].offset;
128	}	135	}
129		136
130	/* Find the exact or preceding frame in lookup_table[]. Return both frame
131	* and byte position of this match. */
132	static void gather_offset(demux_res_t demux_res, uint32_t frame, uint32_t *offset)
133	{
134	uint32_t i = 0;
135	for (i=0; i<demux_res->num_lookup_table; ++i)
136	{
137	if (demux_res->lookup_table[i].offset == 0)
138	break;
139	if (demux_res->lookup_table[i].sample > *frame)
140	break;
141	}
142	i = (i>0) ? i-1 : 0; /* We want the last chunk _before_ frame. /
143	*frame = demux_res->lookup_table[i].sample;
144	*offset = demux_res->lookup_table[i].offset;
145	}
146
147	/* Seek to desired sound sample location. Return 1 on success (and modify	137	/* Seek to desired sound sample location. Return 1 on success (and modify
148	* sound_samples_done and current_sample), 0 if failed.	138	* sound_samples_done and current_sample), 0 if failed. */
149	*
150	* Find the sample (=frame) that contains the given sound sample, find a best
151	* fit for this sample in the lookup_table[], seek to the byte position. */
152	unsigned int m4a_seek(demux_res_t* demux_res, stream_t* stream,	139	unsigned int m4a_seek(demux_res_t* demux_res, stream_t* stream,
153	uint32_t sound_sample_loc, uint32_t* sound_samples_done,	140	uint32_t sound_sample_loc, uint32_t* sound_samples_done,
154	int* current_sample)	141	int* current_sample)
155	{	142	{
156	uint32_t i = 0;	143	uint32_t i, sample_i, sound_sample_i;
157	uint32_t tmp_var, tmp_cnt, tmp_dur;	144	uint32_t time, time_cnt, time_dur;
158	uint32_t new_sample = 0; /* Holds the amount of chunks/frames. */	145	uint32_t chunk, chunk_first_sample;
159	uint32_t new_sound_sample = 0; /* Sums up total amount of samples. */	146	uint32_t offset;
160	uint32_t new_pos; /* Holds the desired chunk/frame index. */	147	time_to_sample_t *tts_tab = demux_res->time_to_sample;
161		148	sample_offset_t *tco_tab = demux_res->lookup_table;
162	/* First check we have the appropriate metadata - we should always	149	uint32_t *tsz_tab = demux_res->sample_byte_sizes;
163	* have it.	150
164	*/	151	/* First check we have the required metadata - we should always have it. */
165	if (!demux_res->num_time_to_samples \|\| !demux_res->num_sample_byte_sizes)	152	if (!demux_res->num_time_to_samples \|\| !demux_res->num_sample_byte_sizes)
166	{	153	{
167	return 0;	154	return 0;
168	}	155	}
169		156
170	/* Find the destination block from time_to_sample array */	157	/* The 'sound_sample_loc' we have is PCM-based and not directly usable.
171	time_to_sample_t *tab = demux_res->time_to_sample;	158	* We need to convert it to an MP4 sample number 'sample_i' first. */
172	while (i < demux_res->num_time_to_samples)	159	sample_i = sound_sample_i = 0;
		160	for (time = 0; time < demux_res->num_time_to_samples; ++time)
173	{	161	{
174	tmp_cnt = tab[i].sample_count;	162	time_cnt = tts_tab[time].sample_count;
175	tmp_dur = tab[i].sample_duration;	163	time_dur = tts_tab[time].sample_duration;
176	tmp_var = tmp_cnt * tmp_dur;	164	uint32_t time_var = time_cnt * time_dur;
177	if (sound_sample_loc <= new_sound_sample + tmp_var)	165
		166	if (sound_sample_loc < sound_sample_i + time_var)
178	{	167	{
179	tmp_var = (sound_sample_loc - new_sound_sample);	168	time_var = sound_sample_loc - sound_sample_i;
180	new_sample += tmp_var / tmp_dur;	169	sample_i += time_var / time_dur;
181	new_sound_sample += tmp_var;
182	break;	170	break;
183	}	171	}
184	new_sample += tmp_cnt;	172
185	new_sound_sample += tmp_var;	173	sample_i += time_cnt;
186	++i;	174	sound_sample_i += time_var;
		175	}
		176
		177	/* Find the chunk after 'sample_i'. */
		178	for (chunk = 1; chunk < demux_res->num_lookup_table; ++chunk)
		179	{
		180	if (tco_tab[chunk].offset == 0)
		181	break;
		182	if (tco_tab[chunk].sample > sample_i)
		183	break;
187	}	184	}
188		185
189	/* We know the new sample (=frame), now calculate the file position. */	186	/* The preceding chunk is the one that contains 'sample_i'. */
190	gather_offset(demux_res, &new_sample, &new_pos);	187	chunk--;
		188	chunk_first_sample = tco_tab[chunk].sample;
		189	offset = tco_tab[chunk].offset;
191		190
192	/* We know the new file position, so let's try to seek to it */	191	/* Compute the PCM sample number of the chunk's first sample
193	if (stream->ci->seek_buffer(new_pos))	192	* to get an accurate base for sound_sample_i. */
		193	i = sound_sample_i = 0;
		194	for (time = 0; time < demux_res->num_time_to_samples; ++time)
194	{	195	{
195	*sound_samples_done = new_sound_sample;	196	time_cnt = tts_tab[time].sample_count;
196	*current_sample = new_sample;	197	time_dur = tts_tab[time].sample_duration;
		198
		199	if (chunk_first_sample < i + time_cnt)
		200	{
		201	sound_sample_i += (chunk_first_sample - i) * time_dur;
		202	break;
		203	}
		204
		205	i += time_cnt;
		206	sound_sample_i += time_cnt * time_dur;
		207	}
		208
		209	DEBUGF("seek chunk=%lu, sample=%lu, soundsample=%lu, offset=%lu\n",
		210	(unsigned long)chunk, (unsigned long)chunk_first_sample,
		211	(unsigned long)sound_sample_i, (unsigned long)offset);
		212
		213	if (tsz_tab) {
		214	/* We have a sample-to-bytes table available so we can do accurate
		215	* seeking. Move one sample at a time and update the file offset and
		216	* PCM sample offset as we go. */
		217	for (i = chunk_first_sample;
		218	i < sample_i && i < demux_res->num_sample_byte_sizes; ++i)
		219	{
		220	/* this could be unnecessary */
		221	if (time_cnt == 0 && ++time < demux_res->num_time_to_samples)
		222	{
		223	time_cnt = tts_tab[time].sample_count;
		224	time_dur = tts_tab[time].sample_duration;
		225	}
		226
		227	offset += tsz_tab[i];
		228	sound_sample_i += time_dur;
		229	time_cnt--;
		230	}
		231	} else {
		232	/* No sample-to-bytes table available so we can only seek to the
		233	* start of a chunk, which is often much lower resolution. */
		234	sample_i = chunk_first_sample;
		235	}
		236
		237	if (stream->ci->seek_buffer(offset))
		238	{
		239	*sound_samples_done = sound_sample_i;
		240	*current_sample = sample_i;
197	return 1;	241	return 1;
198	}	242	}
199		243
200	return 0;	244	return 0;
201	}	245	}
202		246