summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDominik Riebeling <Dominik.Riebeling@gmail.com>2010-02-28 20:19:54 +0000
committerDominik Riebeling <Dominik.Riebeling@gmail.com>2010-02-28 20:19:54 +0000
commit495edfb7b95994ed44a5ebb4ef1fca716d1254d4 (patch)
tree795cc85d9979a3545b270c747c19601d84ac3b3d
parentc1689ca80d3f43ca079062864fe0d9c2c32ff99b (diff)
downloadrockbox-495edfb7b95994ed44a5ebb4ef1fca716d1254d4.tar.gz
rockbox-495edfb7b95994ed44a5ebb4ef1fca716d1254d4.zip
Add support for OS X' Speech Synthesis Manager TTS system.
This adds basic support for using the OS X' TTS system. The current implementation does only support selecting the voice itself, no further settings (like speed pitch / speed) adjustments are implemented. As OS X' TTS system wants the strings to get spoken in 8 bit encoding problems with locale combinations are possible. For this better error handling in the rbutil TTS is needed. The voice test button in the configuration dialog reacts pretty slow due to the way its speaking is done. This can get changed but also requires adjustments in the rbutil TTS system. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24979 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--rbutil/rbutilqt/base/ttsbase.cpp14
-rw-r--r--rbutil/rbutilqt/base/ttscarbon.cpp405
-rw-r--r--rbutil/rbutilqt/base/ttscarbon.h72
-rw-r--r--rbutil/rbutilqt/rbutilqt.pro4
4 files changed, 494 insertions, 1 deletions
diff --git a/rbutil/rbutilqt/base/ttsbase.cpp b/rbutil/rbutilqt/base/ttsbase.cpp
index 1f4060fc72..d68c1816ab 100644
--- a/rbutil/rbutilqt/base/ttsbase.cpp
+++ b/rbutil/rbutilqt/base/ttsbase.cpp
@@ -22,6 +22,9 @@
22#include "ttsfestival.h" 22#include "ttsfestival.h"
23#include "ttssapi.h" 23#include "ttssapi.h"
24#include "ttsexes.h" 24#include "ttsexes.h"
25#if defined(Q_OS_MACX)
26#include "ttscarbon.h"
27#endif
25 28
26// list of tts names and identifiers 29// list of tts names and identifiers
27QMap<QString,QString> TTSBase::ttsList; 30QMap<QString,QString> TTSBase::ttsList;
@@ -43,6 +46,9 @@ void TTSBase::initTTSList()
43#if defined(Q_OS_LINUX) 46#if defined(Q_OS_LINUX)
44 ttsList["festival"] = "Festival TTS Engine"; 47 ttsList["festival"] = "Festival TTS Engine";
45#endif 48#endif
49#if defined(Q_OS_MACX)
50 ttsList["carbon"] = "OS X System Engine";
51#endif
46} 52}
47 53
48// function to get a specific encoder 54// function to get a specific encoder
@@ -66,6 +72,14 @@ TTSBase* TTSBase::getTTS(QObject* parent,QString ttsName)
66 } 72 }
67 else 73 else
68#endif 74#endif
75#if defined(Q_OS_MACX)
76 if(ttsName == "carbon")
77 {
78 tts = new TTSCarbon(parent);
79 return tts;
80 }
81 else
82#endif
69 if (true) // fix for OS other than WIN or LINUX 83 if (true) // fix for OS other than WIN or LINUX
70 { 84 {
71 tts = new TTSExes(ttsName,parent); 85 tts = new TTSExes(ttsName,parent);
diff --git a/rbutil/rbutilqt/base/ttscarbon.cpp b/rbutil/rbutilqt/base/ttscarbon.cpp
new file mode 100644
index 0000000000..b8259a374b
--- /dev/null
+++ b/rbutil/rbutilqt/base/ttscarbon.cpp
@@ -0,0 +1,405 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 *
9 * Copyright (C) 2010 by Dominik Riebeling
10 * $Id$
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20#include <QtCore>
21#include "ttsbase.h"
22#include "ttscarbon.h"
23#include "encttssettings.h"
24#include "rbsettings.h"
25
26#include <CoreFoundation/CoreFoundation.h>
27#include <Carbon/Carbon.h>
28#include <unistd.h>
29#include <sys/stat.h>
30#include <inttypes.h>
31
32TTSCarbon::TTSCarbon(QObject* parent) : TTSBase(parent)
33{
34}
35
36
37bool TTSCarbon::configOk()
38{
39 return true;
40}
41
42
43bool TTSCarbon::start(QString *errStr)
44{
45 (void)errStr;
46 VoiceSpec vspec;
47 VoiceSpec* vspecref;
48 VoiceDescription vdesc;
49 OSErr error;
50 QString selectedVoice
51 = RbSettings::subValue("carbon", RbSettings::TtsVoice).toString();
52 SInt16 numVoices;
53 SInt16 voiceIndex;
54 error = CountVoices(&numVoices);
55 for(voiceIndex = 1; voiceIndex < numVoices; ++voiceIndex) {
56 error = GetIndVoice(voiceIndex, &vspec);
57 error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
58 // name is pascal string, i.e. the first byte is the length.
59 QString name = QString::fromLocal8Bit((const char*)&vdesc.name[1],
60 vdesc.name[0]);
61 if(name == selectedVoice) {
62 vspecref = &vspec;
63 if(vdesc.script != -1)
64 m_voiceScript = (CFStringBuiltInEncodings)vdesc.script;
65 else
66 m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0];
67 break;
68 }
69 }
70 if(voiceIndex == numVoices) {
71 // voice not found. Add user notification here and proceed with
72 // system default voice.
73 qDebug() << "selected voice not found, using system default!";
74 vspecref = NULL;
75 GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
76 if(vdesc.script != -1)
77 m_voiceScript = (CFStringBuiltInEncodings)vdesc.script;
78 else
79 m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0];
80 }
81
82 error = NewSpeechChannel(vspecref, &m_channel);
83 //SetSpeechInfo(channel, soSpeechDoneCallBack, speechDone);
84 return (error == 0) ? true : false;
85}
86
87
88bool TTSCarbon::stop(void)
89{
90 DisposeSpeechChannel(m_channel);
91 return true;
92}
93
94
95void TTSCarbon::generateSettings(void)
96{
97 QStringList voiceNames;
98 QString systemVoice;
99 SInt16 numVoices;
100 OSErr error;
101 VoiceSpec vspec;
102 VoiceDescription vdesc;
103
104 // get system voice
105 error = GetVoiceDescription(NULL, &vdesc, sizeof(vdesc));
106 systemVoice
107 = QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]);
108 // get list of all voices
109 CountVoices(&numVoices);
110 for(SInt16 i = 1; i < numVoices; ++i) {
111 error = GetIndVoice(i, &vspec);
112 error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
113 // name is pascal string, i.e. the first byte is the length.
114 QString name
115 = QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]);
116 voiceNames.append(name.trimmed());
117 }
118 // voice
119 EncTtsSetting* setting;
120 QString voice
121 = RbSettings::subValue("carbon", RbSettings::TtsVoice).toString();
122 if(voice.isEmpty())
123 voice = systemVoice;
124 setting = new EncTtsSetting(this, EncTtsSetting::eSTRINGLIST,
125 tr("Voice:"), voice, voiceNames, EncTtsSetting::eNOBTN);
126 insertSetting(ConfigVoice, setting);
127
128}
129
130
131void TTSCarbon::saveSettings(void)
132{
133 // save settings in user config
134 RbSettings::setSubValue("carbon", RbSettings::TtsVoice,
135 getSetting(ConfigVoice)->current().toString());
136 RbSettings::sync();
137}
138
139
140/** @brief create wav file from text using the selected TTS voice.
141 */
142TTSStatus TTSCarbon::voice(QString text, QString wavfile, QString* errStr)
143{
144 TTSStatus status = NoError;
145 OSErr error;
146
147 QString aifffile = wavfile + ".aiff";
148 // FIXME: find out why we need to do this.
149 // Create a local copy of the temporary file filename.
150 // Not doing so causes weird issues (path contains trailing spaces)
151 unsigned int len = aifffile.size() + 1;
152 char* tmpfile = (char*)malloc(len * sizeof(char));
153 strncpy(tmpfile, aifffile.toLocal8Bit().constData(), len);
154 CFStringRef tmpfileref = CFStringCreateWithCString(kCFAllocatorDefault,
155 tmpfile, kCFStringEncodingUTF8);
156 CFURLRef urlref = CFURLCreateWithFileSystemPath(kCFAllocatorDefault,
157 tmpfileref, kCFURLPOSIXPathStyle, false);
158 SetSpeechInfo(m_channel, soOutputToFileWithCFURL, urlref);
159
160 // speak it.
161 // Convert the string to the encoding requested by the voice. Do this
162 // via CFString, as this allows to directly use the destination encoding
163 // as CFString uses the same values as the voice.
164
165 // allocate enough space to allow storing the string in a 2 byte encoding
166 unsigned int textlen = 2 * text.length() + 1;
167 char* textbuf = (char*)calloc(textlen, sizeof(char));
168 char* utf8data = (char*)text.toUtf8().constData();
169 int utf8bytes = text.toUtf8().size();
170 CFStringRef cfstring = CFStringCreateWithBytes(kCFAllocatorDefault,
171 (UInt8*)utf8data, utf8bytes,
172 kCFStringEncodingUTF8, (Boolean)false);
173 CFIndex usedBuf = 0;
174 CFRange range;
175 range.location = 0; // character in string to start.
176 range.length = text.length(); // number of _characters_ in string
177 // FIXME: check if converting between encodings was lossless.
178 CFStringGetBytes(cfstring, range, m_voiceScript, ' ',
179 false, (UInt8*)textbuf, textlen, &usedBuf);
180
181 error = SpeakText(m_channel, textbuf, (unsigned long)usedBuf);
182 while(SpeechBusy()) {
183 // FIXME: add small delay here to make calls less frequent
184 QCoreApplication::processEvents();
185 }
186 if(error != 0) {
187 *errStr = tr("Could not voice string");
188 status = FatalError;
189 }
190 free(textbuf);
191 CFRelease(cfstring);
192
193 // convert the temporary aiff file to wav
194 if(status == NoError
195 && convertAiffToWav(tmpfile, wavfile.toLocal8Bit().constData()) != 0) {
196 *errStr = tr("Could not convert intermediate file");
197 status = FatalError;
198 }
199 // remove temporary aiff file
200 unlink(tmpfile);
201 free(tmpfile);
202
203 return status;
204}
205
206
207unsigned long TTSCarbon::be2u32(unsigned char* buf)
208{
209 return (buf[0]&0xff)<<24 | (buf[1]&0xff)<<16 | (buf[2]&0xff)<<8 | (buf[3]&0xff);
210}
211
212
213unsigned long TTSCarbon::be2u16(unsigned char* buf)
214{
215 return buf[1]&0xff | (buf[0]&0xff)<<8;
216}
217
218
219unsigned char* TTSCarbon::u32tobuf(unsigned char* buf, uint32_t val)
220{
221 buf[0] = val & 0xff;
222 buf[1] = (val>> 8) & 0xff;
223 buf[2] = (val>>16) & 0xff;
224 buf[3] = (val>>24) & 0xff;
225 return buf;
226}
227
228
229unsigned char* TTSCarbon::u16tobuf(unsigned char* buf, uint16_t val)
230{
231 buf[0] = val & 0xff;
232 buf[1] = (val>> 8) & 0xff;
233 return buf;
234}
235
236
237/** @brief convert 80 bit extended ("long double") to int.
238 * This is simplified to handle the usual audio sample rates. Everything else
239 * might break. If the value isn't supported it will return 0.
240 * Conversion taken from Rockbox aiff codec.
241 */
242unsigned int TTSCarbon::extended2int(unsigned char* buf)
243{
244 unsigned int result = 0;
245 /* value negative? */
246 if(buf[0] & 0x80)
247 return 0;
248 /* check exponent. Int can handle up to 2^31. */
249 int exponent = buf[0] << 8 | buf[1];
250 if(exponent < 0x4000 || exponent > (0x4000 + 30))
251 return 0;
252 result = ((buf[2]<<24) | (buf[3]<<16) | (buf[4]<<8) | buf[5]) + 1;
253 result >>= (16 + 14 - buf[1]);
254 return result;
255}
256
257
258/** @brief Convert aiff file to wav. Returns 0 on success.
259 */
260int TTSCarbon::convertAiffToWav(const char* aiff, const char* wav)
261{
262 struct commchunk {
263 unsigned long chunksize;
264 unsigned short channels;
265 unsigned long frames;
266 unsigned short size;
267 int rate;
268 };
269
270 struct ssndchunk {
271 unsigned long chunksize;
272 unsigned long offset;
273 unsigned long blocksize;
274 };
275
276 FILE* in;
277 FILE* out;
278 unsigned char obuf[4];
279 unsigned char* buf;
280 /* minimum file size for a valid aiff file is 46 bytes:
281 * - FORM chunk: 12 bytes
282 * - COMM chunk: 18 bytes
283 * - SSND chunk: 16 bytes (with no actual data)
284 */
285 struct stat filestat;
286 stat(aiff, &filestat);
287 if(filestat.st_size < 46)
288 return -1;
289 /* read input file into memory */
290 buf = (unsigned char*)malloc(filestat.st_size * sizeof(unsigned char));
291 if(!buf) /* error out if malloc() failed */
292 return -1;
293 in = fopen(aiff, "rb");
294 if(fread(buf, 1, filestat.st_size, in) < filestat.st_size) {
295 printf("could not read file: not enought bytes read\n");
296 fclose(in);
297 return -1;
298 }
299 fclose(in);
300
301 /* check input file format */
302 if(memcmp(buf, "FORM", 4) | memcmp(&buf[8], "AIFF", 4)) {
303 printf("No valid AIFF header found.\n");
304 free(buf);
305 return -1;
306 }
307 /* read COMM chunk */
308 unsigned char* commstart = &buf[12];
309 struct commchunk comm;
310 if(memcmp(commstart, "COMM", 4)) {
311 printf("COMM chunk not at beginning.\n");
312 free(buf);
313 return -1;
314 }
315 comm.chunksize = be2u32(&commstart[4]);
316 comm.channels = be2u16(&commstart[8]);
317 comm.frames = be2u32(&commstart[10]);
318 comm.size = be2u16(&commstart[14]);
319 comm.rate = extended2int(&commstart[16]);
320
321 /* find SSND as next chunk */
322 unsigned char* ssndstart = commstart + 8 + comm.chunksize;
323 while(memcmp(ssndstart, "SSND", 4) && ssndstart < (buf + filestat.st_size)) {
324 printf("Skipping chunk.\n");
325 ssndstart += be2u32(&ssndstart[4]) + 8;
326 }
327 if(ssndstart > (buf + filestat.st_size)) {
328 free(buf);
329 return -1;
330 }
331
332 struct ssndchunk ssnd;
333 ssnd.chunksize = be2u32(&ssndstart[4]);
334 ssnd.offset = be2u32(&ssndstart[8]);
335 ssnd.blocksize = be2u32(&ssndstart[12]);
336
337 /* Calculate the total length of the resulting RIFF chunk.
338 * The length is given by frames * samples * bytes/sample.
339 * We need to add:
340 * - 16 bytes: fmt chunk header
341 * - 8 bytes: data chunk header
342 * - 4 bytes: wave chunk identifier
343 */
344 out = fopen(wav, "wb+");
345
346 /* write the wav header */
347 unsigned short blocksize = comm.channels * (comm.size >> 3);
348 unsigned long rifflen = blocksize * comm.frames + 28;
349 fwrite("RIFF", 1, 4, out);
350 fwrite(u32tobuf(obuf, rifflen), 1, 4, out);
351 fwrite("WAVE", 1, 4, out);
352
353 /* write the fmt chunk and chunk size (always 16) */
354 /* write fmt chunk header:
355 * header, size (always 0x10, format code (always 0x0001)
356 */
357 fwrite("fmt \x10\x00\x00\x00\x01\x00", 1, 10, out);
358 /* number of channels (2 bytes) */
359 fwrite(u16tobuf(obuf, comm.channels), 1, 2, out);
360 /* sampling rate (4 bytes) */
361 fwrite(u32tobuf(obuf, comm.rate), 1, 4, out);
362
363 /* data rate, i.e. bytes/sec */
364 fwrite(u32tobuf(obuf, comm.rate * blocksize), 1, 4, out);
365
366 /* data block size */
367 fwrite(u16tobuf(obuf, blocksize), 1, 2, out);
368
369 /* bits per sample */
370 fwrite(u16tobuf(obuf, comm.size), 1, 2, out);
371
372 /* write the data chunk */
373 /* chunk id */
374 fwrite("data", 1, 4, out);
375 /* chunk size: 4 bytes. */
376 unsigned long cs = blocksize * comm.frames;
377 fwrite(u32tobuf(obuf, cs), 1, 4, out);
378
379 /* write data */
380 unsigned char* data = ssndstart;
381 unsigned long pos = ssnd.chunksize;
382 /* byteswap if samples are 16 bit */
383 if(comm.size == 16) {
384 while(pos) {
385 obuf[1] = *data++ & 0xff;
386 obuf[0] = *data++ & 0xff;
387 fwrite(obuf, 1, 2, out);
388 pos -= 2;
389 }
390 }
391 /* 8 bit samples have need no conversion so we can bulk copy.
392 * Everything that is not 16 bit is considered 8. */
393 else {
394 fwrite(data, 1, pos, out);
395 }
396 /* number of bytes has to be even, even if chunksize is not. */
397 if(cs % 2) {
398 fwrite(obuf, 1, 1, out);
399 }
400
401 fclose(out);
402 free(buf);
403 return 0;
404}
405
diff --git a/rbutil/rbutilqt/base/ttscarbon.h b/rbutil/rbutilqt/base/ttscarbon.h
new file mode 100644
index 0000000000..c6b4a61320
--- /dev/null
+++ b/rbutil/rbutilqt/base/ttscarbon.h
@@ -0,0 +1,72 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 *
9 * Copyright (C) 2010 by Dominik Riebeling
10 * $Id$
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20#ifndef TTSCARBON_H
21#define TTSCARBON_H
22
23#include <QtCore>
24#include "ttsbase.h"
25
26#include <Carbon/Carbon.h>
27#include <inttypes.h>
28
29class TTSCarbon : public TTSBase
30{
31 Q_OBJECT
32 //! Enum to identify the settings
33 enum ConfigValuesCarbon
34 {
35 ConfigVoice
36 };
37 public:
38 TTSCarbon(QObject *parent = NULL);
39
40 //! Child class should generate a clip
41 TTSStatus voice(QString text,QString wavfile, QString* errStr);
42 //! Child class should do startup
43 bool start(QString *errStr);
44 //! child class should stop
45 bool stop() ;
46
47 // configuration
48 //! Child class should return true, when configuration is good
49 bool configOk();
50 //! Child class should generate and insertSetting(..) its settings
51 void generateSettings();
52 //! Chlid class should commit the Settings to permanent storage
53 void saveSettings();
54
55 private:
56 SpeechChannel m_channel;
57 CFStringBuiltInEncodings m_voiceScript;
58
59 unsigned long be2u32(unsigned char* buf);
60 unsigned long be2u16(unsigned char* buf);
61 unsigned char* u32tobuf(unsigned char* buf, uint32_t val);
62 unsigned char* u16tobuf(unsigned char* buf, uint16_t val);
63 unsigned int extended2int(unsigned char* buf);
64 int convertAiffToWav(const char* aiff, const char* wav);
65
66
67 protected:
68 // static QMap<QString,QString> ttsList;
69};
70
71#endif // TTSCARBON_H
72
diff --git a/rbutil/rbutilqt/rbutilqt.pro b/rbutil/rbutilqt/rbutilqt.pro
index 95c00c04a3..f8321182e3 100644
--- a/rbutil/rbutilqt/rbutilqt.pro
+++ b/rbutil/rbutilqt/rbutilqt.pro
@@ -272,11 +272,13 @@ unix:static {
272} 272}
273 273
274macx { 274macx {
275 SOURCES += base/ttscarbon.cpp
276 HEADERS += base/ttscarbon.h
275 QMAKE_MAC_SDK=/Developer/SDKs/MacOSX10.4u.sdk 277 QMAKE_MAC_SDK=/Developer/SDKs/MacOSX10.4u.sdk
276 QMAKE_LFLAGS_PPC=-mmacosx-version-min=10.4 -arch ppc 278 QMAKE_LFLAGS_PPC=-mmacosx-version-min=10.4 -arch ppc
277 QMAKE_LFLAGS_X86=-mmacosx-version-min=10.4 -arch i386 279 QMAKE_LFLAGS_X86=-mmacosx-version-min=10.4 -arch i386
278 CONFIG+=x86 ppc 280 CONFIG+=x86 ppc
279 LIBS += -L/usr/local/lib -framework IOKit -framework CoreFoundation -lz 281 LIBS += -L/usr/local/lib -framework IOKit -framework CoreFoundation -framework Carbon -lz
280 INCLUDEPATH += /usr/local/include 282 INCLUDEPATH += /usr/local/include
281 QMAKE_INFO_PLIST = Info.plist 283 QMAKE_INFO_PLIST = Info.plist
282 RC_FILE = icons/rbutilqt.icns 284 RC_FILE = icons/rbutilqt.icns