diff options
Diffstat (limited to 'utils/rbutilqt/base/ttscarbon.cpp')
-rw-r--r-- | utils/rbutilqt/base/ttscarbon.cpp | 443 |
1 files changed, 443 insertions, 0 deletions
diff --git a/utils/rbutilqt/base/ttscarbon.cpp b/utils/rbutilqt/base/ttscarbon.cpp new file mode 100644 index 0000000000..2d9fa49dbe --- /dev/null +++ b/utils/rbutilqt/base/ttscarbon.cpp | |||
@@ -0,0 +1,443 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * | ||
9 | * Copyright (C) 2010 by Dominik Riebeling | ||
10 | * | ||
11 | * All files in this archive are subject to the GNU General Public License. | ||
12 | * See the file COPYING in the source tree root for full license agreement. | ||
13 | * | ||
14 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
15 | * KIND, either express or implied. | ||
16 | * | ||
17 | ****************************************************************************/ | ||
18 | |||
19 | #include <QtCore> | ||
20 | #include "ttsbase.h" | ||
21 | #include "ttscarbon.h" | ||
22 | #include "encttssettings.h" | ||
23 | #include "rbsettings.h" | ||
24 | |||
25 | #include <CoreFoundation/CoreFoundation.h> | ||
26 | #include <ApplicationServices/ApplicationServices.h> | ||
27 | #include <Carbon/Carbon.h> | ||
28 | #include <unistd.h> | ||
29 | #include <sys/stat.h> | ||
30 | #include <inttypes.h> | ||
31 | #include "Logger.h" | ||
32 | |||
33 | TTSCarbon::TTSCarbon(QObject* parent) : TTSBase(parent) | ||
34 | { | ||
35 | } | ||
36 | |||
37 | TTSBase::Capabilities TTSCarbon::capabilities() | ||
38 | { | ||
39 | return TTSBase::CanSpeak; | ||
40 | } | ||
41 | |||
42 | bool TTSCarbon::configOk() | ||
43 | { | ||
44 | return true; | ||
45 | } | ||
46 | |||
47 | |||
48 | bool TTSCarbon::start(QString *errStr) | ||
49 | { | ||
50 | (void)errStr; | ||
51 | VoiceSpec vspec; | ||
52 | VoiceSpec* vspecref = NULL; | ||
53 | VoiceDescription vdesc; | ||
54 | OSErr error; | ||
55 | QString selectedVoice | ||
56 | = RbSettings::subValue("carbon", RbSettings::TtsVoice).toString(); | ||
57 | SInt16 numVoices; | ||
58 | SInt16 voiceIndex; | ||
59 | error = CountVoices(&numVoices); | ||
60 | for(voiceIndex = 1; voiceIndex < numVoices; ++voiceIndex) { | ||
61 | error = GetIndVoice(voiceIndex, &vspec); | ||
62 | error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc)); | ||
63 | // name is pascal string, i.e. the first byte is the length. | ||
64 | QString name = QString::fromLocal8Bit((const char*)&vdesc.name[1], | ||
65 | vdesc.name[0]); | ||
66 | if(name == selectedVoice) { | ||
67 | vspecref = &vspec; | ||
68 | if(vdesc.script != -1) | ||
69 | m_voiceScript = (CFStringBuiltInEncodings)vdesc.script; | ||
70 | else | ||
71 | m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0]; | ||
72 | break; | ||
73 | } | ||
74 | } | ||
75 | if(voiceIndex == numVoices) { | ||
76 | // voice not found. Add user notification here and proceed with | ||
77 | // system default voice. | ||
78 | LOG_WARNING() << "Selected voice not found, using system default!"; | ||
79 | GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc)); | ||
80 | if(vdesc.script != -1) | ||
81 | m_voiceScript = (CFStringBuiltInEncodings)vdesc.script; | ||
82 | else | ||
83 | m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0]; | ||
84 | } | ||
85 | |||
86 | error = NewSpeechChannel(vspecref, &m_channel); | ||
87 | //SetSpeechInfo(channel, soSpeechDoneCallBack, speechDone); | ||
88 | Fixed rate = (Fixed)(0x10000 * RbSettings::subValue("carbon", | ||
89 | RbSettings::TtsSpeed).toInt()); | ||
90 | if(rate != 0) | ||
91 | SetSpeechRate(m_channel, rate); | ||
92 | |||
93 | Fixed pitch = (Fixed)(0x10000 * RbSettings::subValue("carbon", | ||
94 | RbSettings::TtsPitch).toInt()); | ||
95 | if(pitch != 0) | ||
96 | SetSpeechPitch(m_channel, pitch); | ||
97 | |||
98 | return (error == 0) ? true : false; | ||
99 | } | ||
100 | |||
101 | |||
102 | bool TTSCarbon::stop(void) | ||
103 | { | ||
104 | DisposeSpeechChannel(m_channel); | ||
105 | return true; | ||
106 | } | ||
107 | |||
108 | |||
109 | void TTSCarbon::generateSettings(void) | ||
110 | { | ||
111 | QStringList voiceNames; | ||
112 | QString systemVoice; | ||
113 | SInt16 numVoices; | ||
114 | OSErr error; | ||
115 | VoiceSpec vspec; | ||
116 | VoiceDescription vdesc; | ||
117 | |||
118 | // get system voice | ||
119 | error = GetVoiceDescription(NULL, &vdesc, sizeof(vdesc)); | ||
120 | systemVoice | ||
121 | = QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]); | ||
122 | // get list of all voices | ||
123 | CountVoices(&numVoices); | ||
124 | for(SInt16 i = 1; i < numVoices; ++i) { | ||
125 | error = GetIndVoice(i, &vspec); | ||
126 | error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc)); | ||
127 | // name is pascal string, i.e. the first byte is the length. | ||
128 | QString name | ||
129 | = QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]); | ||
130 | voiceNames.append(name.trimmed()); | ||
131 | } | ||
132 | // voice | ||
133 | EncTtsSetting* setting; | ||
134 | QString voice | ||
135 | = RbSettings::subValue("carbon", RbSettings::TtsVoice).toString(); | ||
136 | if(voice.isEmpty()) | ||
137 | voice = systemVoice; | ||
138 | setting = new EncTtsSetting(this, EncTtsSetting::eSTRINGLIST, | ||
139 | tr("Voice:"), voice, voiceNames, EncTtsSetting::eNOBTN); | ||
140 | insertSetting(ConfigVoice, setting); | ||
141 | |||
142 | // speed | ||
143 | int speed = RbSettings::subValue("carbon", RbSettings::TtsSpeed).toInt(); | ||
144 | setting = new EncTtsSetting(this, EncTtsSetting::eINT, | ||
145 | tr("Speed (words/min):"), speed, 80, 500, | ||
146 | EncTtsSetting::eNOBTN); | ||
147 | insertSetting(ConfigSpeed, setting); | ||
148 | |||
149 | // pitch | ||
150 | int pitch = RbSettings::subValue("carbon", RbSettings::TtsPitch).toInt(); | ||
151 | setting = new EncTtsSetting(this, EncTtsSetting::eINT, | ||
152 | tr("Pitch (0 for default):"), pitch, 0, 65, | ||
153 | EncTtsSetting::eNOBTN); | ||
154 | insertSetting(ConfigPitch, setting); | ||
155 | |||
156 | } | ||
157 | |||
158 | |||
159 | void TTSCarbon::saveSettings(void) | ||
160 | { | ||
161 | // save settings in user config | ||
162 | RbSettings::setSubValue("carbon", RbSettings::TtsVoice, | ||
163 | getSetting(ConfigVoice)->current().toString()); | ||
164 | RbSettings::setSubValue("carbon", RbSettings::TtsSpeed, | ||
165 | getSetting(ConfigSpeed)->current().toInt()); | ||
166 | RbSettings::setSubValue("carbon", RbSettings::TtsPitch, | ||
167 | getSetting(ConfigPitch)->current().toInt()); | ||
168 | RbSettings::sync(); | ||
169 | } | ||
170 | |||
171 | |||
172 | /** @brief create wav file from text using the selected TTS voice. | ||
173 | */ | ||
174 | TTSStatus TTSCarbon::voice(QString text, QString wavfile, QString* errStr) | ||
175 | { | ||
176 | TTSStatus status = NoError; | ||
177 | OSErr error; | ||
178 | |||
179 | char* tmpfile = NULL; | ||
180 | if(!wavfile.isEmpty()) { | ||
181 | QString aifffile = wavfile + ".aiff"; | ||
182 | // FIXME: find out why we need to do this. | ||
183 | // Create a local copy of the temporary file filename. | ||
184 | // Not doing so causes weird issues (path contains trailing spaces) | ||
185 | unsigned int len = aifffile.size() + 1; | ||
186 | tmpfile = (char*)malloc(len * sizeof(char)); | ||
187 | strncpy(tmpfile, aifffile.toLocal8Bit().constData(), len); | ||
188 | CFStringRef tmpfileref = CFStringCreateWithCString(kCFAllocatorDefault, | ||
189 | tmpfile, kCFStringEncodingUTF8); | ||
190 | CFURLRef urlref = CFURLCreateWithFileSystemPath(kCFAllocatorDefault, | ||
191 | tmpfileref, kCFURLPOSIXPathStyle, false); | ||
192 | SetSpeechInfo(m_channel, soOutputToFileWithCFURL, urlref); | ||
193 | } | ||
194 | |||
195 | // speak it. | ||
196 | // Convert the string to the encoding requested by the voice. Do this | ||
197 | // via CFString, as this allows to directly use the destination encoding | ||
198 | // as CFString uses the same values as the voice. | ||
199 | |||
200 | // allocate enough space to allow storing the string in a 2 byte encoding | ||
201 | unsigned int textlen = 2 * text.length() + 1; | ||
202 | char* textbuf = (char*)calloc(textlen, sizeof(char)); | ||
203 | char* utf8data = (char*)text.toUtf8().constData(); | ||
204 | int utf8bytes = text.toUtf8().size(); | ||
205 | CFStringRef cfstring = CFStringCreateWithBytes(kCFAllocatorDefault, | ||
206 | (UInt8*)utf8data, utf8bytes, | ||
207 | kCFStringEncodingUTF8, (Boolean)false); | ||
208 | CFIndex usedBuf = 0; | ||
209 | CFRange range; | ||
210 | range.location = 0; // character in string to start. | ||
211 | range.length = text.length(); // number of _characters_ in string | ||
212 | // FIXME: check if converting between encodings was lossless. | ||
213 | CFStringGetBytes(cfstring, range, m_voiceScript, ' ', | ||
214 | false, (UInt8*)textbuf, textlen, &usedBuf); | ||
215 | |||
216 | error = SpeakText(m_channel, textbuf, (unsigned long)usedBuf); | ||
217 | while(SpeechBusy()) { | ||
218 | // FIXME: add small delay here to make calls less frequent | ||
219 | QCoreApplication::processEvents(); | ||
220 | } | ||
221 | if(error != 0) { | ||
222 | *errStr = tr("Could not voice string"); | ||
223 | status = FatalError; | ||
224 | } | ||
225 | free(textbuf); | ||
226 | CFRelease(cfstring); | ||
227 | |||
228 | if(!wavfile.isEmpty()) { | ||
229 | // convert the temporary aiff file to wav | ||
230 | if(status == NoError | ||
231 | && convertAiffToWav(tmpfile, wavfile.toLocal8Bit().constData()) != 0) { | ||
232 | *errStr = tr("Could not convert intermediate file"); | ||
233 | status = FatalError; | ||
234 | } | ||
235 | // remove temporary aiff file | ||
236 | unlink(tmpfile); | ||
237 | free(tmpfile); | ||
238 | } | ||
239 | |||
240 | return status; | ||
241 | } | ||
242 | |||
243 | |||
244 | unsigned long TTSCarbon::be2u32(unsigned char* buf) | ||
245 | { | ||
246 | return (buf[0]&0xff)<<24 | (buf[1]&0xff)<<16 | (buf[2]&0xff)<<8 | (buf[3]&0xff); | ||
247 | } | ||
248 | |||
249 | |||
250 | unsigned long TTSCarbon::be2u16(unsigned char* buf) | ||
251 | { | ||
252 | return (buf[1]&0xff) | (buf[0]&0xff)<<8; | ||
253 | } | ||
254 | |||
255 | |||
256 | unsigned char* TTSCarbon::u32tobuf(unsigned char* buf, uint32_t val) | ||
257 | { | ||
258 | buf[0] = val & 0xff; | ||
259 | buf[1] = (val>> 8) & 0xff; | ||
260 | buf[2] = (val>>16) & 0xff; | ||
261 | buf[3] = (val>>24) & 0xff; | ||
262 | return buf; | ||
263 | } | ||
264 | |||
265 | |||
266 | unsigned char* TTSCarbon::u16tobuf(unsigned char* buf, uint16_t val) | ||
267 | { | ||
268 | buf[0] = val & 0xff; | ||
269 | buf[1] = (val>> 8) & 0xff; | ||
270 | return buf; | ||
271 | } | ||
272 | |||
273 | |||
274 | /** @brief convert 80 bit extended ("long double") to int. | ||
275 | * This is simplified to handle the usual audio sample rates. Everything else | ||
276 | * might break. If the value isn't supported it will return 0. | ||
277 | * Conversion taken from Rockbox aiff codec. | ||
278 | */ | ||
279 | unsigned int TTSCarbon::extended2int(unsigned char* buf) | ||
280 | { | ||
281 | unsigned int result = 0; | ||
282 | /* value negative? */ | ||
283 | if(buf[0] & 0x80) | ||
284 | return 0; | ||
285 | /* check exponent. Int can handle up to 2^31. */ | ||
286 | int exponent = buf[0] << 8 | buf[1]; | ||
287 | if(exponent < 0x4000 || exponent > (0x4000 + 30)) | ||
288 | return 0; | ||
289 | result = ((buf[2]<<24) | (buf[3]<<16) | (buf[4]<<8) | buf[5]) + 1; | ||
290 | result >>= (16 + 14 - buf[1]); | ||
291 | return result; | ||
292 | } | ||
293 | |||
294 | |||
295 | /** @brief Convert aiff file to wav. Returns 0 on success. | ||
296 | */ | ||
297 | int TTSCarbon::convertAiffToWav(const char* aiff, const char* wav) | ||
298 | { | ||
299 | struct commchunk { | ||
300 | unsigned long chunksize; | ||
301 | unsigned short channels; | ||
302 | unsigned long frames; | ||
303 | unsigned short size; | ||
304 | int rate; | ||
305 | }; | ||
306 | |||
307 | struct ssndchunk { | ||
308 | unsigned long chunksize; | ||
309 | unsigned long offset; | ||
310 | unsigned long blocksize; | ||
311 | }; | ||
312 | |||
313 | FILE* in; | ||
314 | FILE* out; | ||
315 | unsigned char obuf[4]; | ||
316 | unsigned char* buf; | ||
317 | /* minimum file size for a valid aiff file is 46 bytes: | ||
318 | * - FORM chunk: 12 bytes | ||
319 | * - COMM chunk: 18 bytes | ||
320 | * - SSND chunk: 16 bytes (with no actual data) | ||
321 | */ | ||
322 | struct stat filestat; | ||
323 | stat(aiff, &filestat); | ||
324 | if(filestat.st_size < 46) | ||
325 | return -1; | ||
326 | /* read input file into memory */ | ||
327 | buf = (unsigned char*)malloc(filestat.st_size * sizeof(unsigned char)); | ||
328 | if(!buf) /* error out if malloc() failed */ | ||
329 | return -1; | ||
330 | in = fopen(aiff, "rb"); | ||
331 | if(fread(buf, 1, filestat.st_size, in) < filestat.st_size) { | ||
332 | printf("could not read file: not enought bytes read\n"); | ||
333 | fclose(in); | ||
334 | free(buf); | ||
335 | return -1; | ||
336 | } | ||
337 | fclose(in); | ||
338 | |||
339 | /* check input file format */ | ||
340 | if(memcmp(buf, "FORM", 4) | memcmp(&buf[8], "AIFF", 4)) { | ||
341 | printf("No valid AIFF header found.\n"); | ||
342 | free(buf); | ||
343 | return -1; | ||
344 | } | ||
345 | /* read COMM chunk */ | ||
346 | unsigned char* commstart = &buf[12]; | ||
347 | struct commchunk comm; | ||
348 | if(memcmp(commstart, "COMM", 4)) { | ||
349 | printf("COMM chunk not at beginning.\n"); | ||
350 | free(buf); | ||
351 | return -1; | ||
352 | } | ||
353 | comm.chunksize = be2u32(&commstart[4]); | ||
354 | comm.channels = be2u16(&commstart[8]); | ||
355 | comm.frames = be2u32(&commstart[10]); | ||
356 | comm.size = be2u16(&commstart[14]); | ||
357 | comm.rate = extended2int(&commstart[16]); | ||
358 | |||
359 | /* find SSND as next chunk */ | ||
360 | unsigned char* ssndstart = commstart + 8 + comm.chunksize; | ||
361 | while(memcmp(ssndstart, "SSND", 4) && ssndstart < (buf + filestat.st_size)) { | ||
362 | printf("Skipping chunk.\n"); | ||
363 | ssndstart += be2u32(&ssndstart[4]) + 8; | ||
364 | } | ||
365 | if(ssndstart > (buf + filestat.st_size)) { | ||
366 | free(buf); | ||
367 | return -1; | ||
368 | } | ||
369 | |||
370 | struct ssndchunk ssnd; | ||
371 | ssnd.chunksize = be2u32(&ssndstart[4]); | ||
372 | ssnd.offset = be2u32(&ssndstart[8]); | ||
373 | ssnd.blocksize = be2u32(&ssndstart[12]); | ||
374 | |||
375 | /* Calculate the total length of the resulting RIFF chunk. | ||
376 | * The length is given by frames * samples * bytes/sample. | ||
377 | * We need to add: | ||
378 | * - 16 bytes: fmt chunk header | ||
379 | * - 8 bytes: data chunk header | ||
380 | * - 4 bytes: wave chunk identifier | ||
381 | */ | ||
382 | out = fopen(wav, "wb+"); | ||
383 | |||
384 | /* write the wav header */ | ||
385 | unsigned short blocksize = comm.channels * (comm.size >> 3); | ||
386 | unsigned long rifflen = blocksize * comm.frames + 28; | ||
387 | fwrite("RIFF", 1, 4, out); | ||
388 | fwrite(u32tobuf(obuf, rifflen), 1, 4, out); | ||
389 | fwrite("WAVE", 1, 4, out); | ||
390 | |||
391 | /* write the fmt chunk and chunk size (always 16) */ | ||
392 | /* write fmt chunk header: | ||
393 | * header, size (always 0x10, format code (always 0x0001) | ||
394 | */ | ||
395 | fwrite("fmt \x10\x00\x00\x00\x01\x00", 1, 10, out); | ||
396 | /* number of channels (2 bytes) */ | ||
397 | fwrite(u16tobuf(obuf, comm.channels), 1, 2, out); | ||
398 | /* sampling rate (4 bytes) */ | ||
399 | fwrite(u32tobuf(obuf, comm.rate), 1, 4, out); | ||
400 | |||
401 | /* data rate, i.e. bytes/sec */ | ||
402 | fwrite(u32tobuf(obuf, comm.rate * blocksize), 1, 4, out); | ||
403 | |||
404 | /* data block size */ | ||
405 | fwrite(u16tobuf(obuf, blocksize), 1, 2, out); | ||
406 | |||
407 | /* bits per sample */ | ||
408 | fwrite(u16tobuf(obuf, comm.size), 1, 2, out); | ||
409 | |||
410 | /* write the data chunk */ | ||
411 | /* chunk id */ | ||
412 | fwrite("data", 1, 4, out); | ||
413 | /* chunk size: 4 bytes. */ | ||
414 | unsigned long cs = blocksize * comm.frames; | ||
415 | fwrite(u32tobuf(obuf, cs), 1, 4, out); | ||
416 | |||
417 | /* write data */ | ||
418 | unsigned char* data = ssndstart; | ||
419 | unsigned long pos = ssnd.chunksize; | ||
420 | /* byteswap if samples are 16 bit */ | ||
421 | if(comm.size == 16) { | ||
422 | while(pos) { | ||
423 | obuf[1] = *data++ & 0xff; | ||
424 | obuf[0] = *data++ & 0xff; | ||
425 | fwrite(obuf, 1, 2, out); | ||
426 | pos -= 2; | ||
427 | } | ||
428 | } | ||
429 | /* 8 bit samples have need no conversion so we can bulk copy. | ||
430 | * Everything that is not 16 bit is considered 8. */ | ||
431 | else { | ||
432 | fwrite(data, 1, pos, out); | ||
433 | } | ||
434 | /* number of bytes has to be even, even if chunksize is not. */ | ||
435 | if(cs % 2) { | ||
436 | fwrite(obuf, 1, 1, out); | ||
437 | } | ||
438 | |||
439 | fclose(out); | ||
440 | free(buf); | ||
441 | return 0; | ||
442 | } | ||
443 | |||