--- festival-1.4.3.orig/examples/text2wave.sh +++ festival-1.4.3/examples/text2wave.sh @@ -50,7 +50,7 @@ Options -mode Explicit tts mode. -o ofile File to save waveform (default is stdout). - -otype Output waveform type: ulaw, snd, aiff, riff, nist etc. + -otype Output waveform type: alaw, ulaw, snd, aiff, riff, nist etc. (default is riff) -F Output frequency. -scale Volume factor --- festival-1.4.3.orig/doc/festival.texi +++ festival-1.4.3/doc/festival.texi @@ -4190,8 +4194,8 @@ This will save the waveform in the named file in the format specified in the @code{Parameter} @code{Wavefiletype}. All formats supported by the Edinburgh Speech Tools are valid including @code{nist}, @code{esps}, -@code{sun}, @code{riff}, @code{aiff}, @code{raw} and @code{ulaw}. Note -the functions @code{utt.wave.rescale} and @code{utt.wave.resample} may +@code{sun}, @code{riff}, @code{aiff}, @code{raw}, @code{alaw} and @code{ulaw}. +Note the functions @code{utt.wave.rescale} and @code{utt.wave.resample} may be used to change the gain and sample frequency of the waveform before saving it. A waveform may be imported into an existing utterance with the function @code{utt.import.wave}. This is specifically designed to @@ -5778,7 +5782,7 @@ they include a @file{/dev/dsp} (Soundblaster PRO for example). This was not dealt with properly in earlier versions of the system but now the support automatically checks to see the sample width supported and uses -it accordingly. 8 bit at higher frequencies that 8K sounds better than +it accordingly. 8 bit at higher frequencies than 8K sounds better than straight 8k ulaw so this feature is useful. @cindex Windows NT audio @@ -7991,7 +7995,7 @@ server. @item --otype OUTPUTTYPE If an output waveform file is to be used this specified the output type -of the file. The default is @code{nist}, but, @code{ulaw}, +of the file. The default is @code{nist}, but @code{alaw}, @code{riff}, @code{ulaw} and others as supported by the Edinburgh Speech Tools Library are valid. You may use raw too but note that Festival may return waveforms of various sampling rates depending on the --- festival-1.4.3.orig/src/modules/diphone/diphone.cc +++ festival-1.4.3/src/modules/diphone/diphone.cc @@ -180,6 +182,8 @@ db->group_encoding = di_raw; else if (streq(db->group_encoding_str,"ulaw")) db->group_encoding = di_ulaw; + else if (streq(db->group_encoding_str,"alaw")) + db->group_encoding = di_alaw; else { cerr << "Diphone: unknown group encoding" << endl; @@ -218,7 +222,7 @@ { wfree(db->indx[0]->diph); // ptr to the whole diphname table wfree(db->allsignal); - wfree(db->allulawsignal); + wfree(db->allualawsignal); wfree(db->allframes); } for (i=0; i < db->nindex; i++) @@ -275,7 +279,7 @@ db->alternates_before = NIL; db->alternates_after = NIL; db->allsignal = 0; - db->allulawsignal = 0; + db->allualawsignal = 0; db->offsets = 0; db->gfd = 0; db->default_diphone = 0; --- festival-1.4.3.orig/src/modules/diphone/di_io.cc +++ festival-1.4.3/src/modules/diphone/di_io.cc @@ -415,6 +415,16 @@ ulaw_to_short(ulaw,db->vox[i]->signal,db->vox[i]->nsamples); wfree(ulaw); } + else if (db->group_encoding == di_alaw) + { + unsigned char *alaw = + walloc(unsigned char,db->vox[i]->nsamples); + db->vox[i]->signal = walloc(short,db->vox[i]->nsamples); + fseek(db->gfd,db->gsignalbase+(db->offsets[i]),SEEK_SET); + fread(alaw,sizeof(unsigned char),db->vox[i]->nsamples,db->gfd); + alaw_to_short(alaw,db->vox[i]->signal,db->vox[i]->nsamples); + wfree(alaw); + } else { cerr << "Diphone: unknown group type" << endl; @@ -800,8 +810,13 @@ } else if (db->group_encoding == di_ulaw) { - db->allulawsignal = walloc(unsigned char,totsamples); - fread(db->allulawsignal,sizeof(unsigned char),totsamples,db->gfd); + db->allualawsignal = walloc(unsigned char,totsamples); + fread(db->allualawsignal,sizeof(unsigned char),totsamples,db->gfd); + } + else if (db->group_encoding == di_alaw) + { + db->allualawsignal = walloc(unsigned char,totsamples); + fread(db->allualawsignal,sizeof(unsigned char),totsamples,db->gfd); } } else @@ -821,7 +836,13 @@ else if (db->group_encoding == di_ulaw) { db->vox[i]->signal = walloc(short,samp_counts[i]); - ulaw_to_short(&db->allulawsignal[sample_offset], + ulaw_to_short(&db->allualawsignal[sample_offset], + db->vox[i]->signal,samp_counts[i]); + } + else if (db->group_encoding == di_alaw) + { + db->vox[i]->signal = walloc(short,samp_counts[i]); + alaw_to_short(&db->allualawsignal[sample_offset], db->vox[i]->signal,samp_counts[i]); } else @@ -838,7 +859,7 @@ sample_offset += samp_counts[i]; } if (db->sig_access_type != di_direct) - if (db->group_encoding == di_ulaw) + if (db->group_encoding == di_ulaw || db->group_encoding == di_alaw) fseek(db->gfd,(long)sample_offset,SEEK_CUR); else fseek(db->gfd,(long)sample_offset*sizeof(short),SEEK_CUR); @@ -867,7 +888,7 @@ if (db->swap) swap_bytes_float(db->allframes,totframes*db->lpc_order); } - else if (db->group_encoding == di_ulaw) // its as shorts + else if (db->group_encoding == di_ulaw || db->group_encoding == di_alaw) // its as shorts { db->allframesshort = walloc(short,totframes*db->lpc_order); fread(db->allframesshort,sizeof(short), @@ -885,7 +906,7 @@ for (j=0;jlpc[i]->nframes;j++) db->lpc[i]->f[j] = &db->allframes[(frame_offset+j)*db->lpc_order]; - else if (db->group_encoding == di_ulaw) + else if (db->group_encoding == di_ulaw || db->group_encoding == di_alaw) { int fixedpoint = FALSE; if (siod_get_lval("lpc_fixedpoint",NULL) != NIL) @@ -1031,6 +1052,13 @@ fwrite(ulaw,sizeof(unsigned char),db->vox[i]->nsamples,fd); wfree(ulaw); } + else if (db->group_encoding == di_alaw) + { + unsigned char *alaw = walloc(unsigned char,db->vox[i]->nsamples); + short_to_alaw(db->vox[i]->signal,alaw,db->vox[i]->nsamples); + fwrite(alaw,sizeof(unsigned char),db->vox[i]->nsamples,fd); + wfree(alaw); + } else { cerr << "Diphone: unknown group type for dumping" << endl; @@ -1058,7 +1086,7 @@ for (j=0; jlpc[i]->nframes; j++) fwrite(db->lpc[i]->f[j],sizeof(float),db->lpc_order,fd); } - else if (db->group_encoding == di_ulaw) // saved as shorts + else if (db->group_encoding == di_ulaw || db->group_encoding == di_alaw) // saved as shorts { short *sh = new short[db->lpc_order]; --- festival-1.4.3.orig/src/modules/diphone/diphone.h +++ festival-1.4.3/src/modules/diphone/diphone.h @@ -41,7 +41,7 @@ enum di_sigaccess_t {di_direct, di_dynamic, di_ondemand}; enum di_db_type_t {di_pcm, di_lpc}; -enum di_group_encode_t {di_raw, di_ulaw }; +enum di_group_encode_t {di_raw, di_ulaw, di_alaw }; enum di_group_t {di_grouped, di_ungrouped}; typedef struct { @@ -109,7 +109,7 @@ int lpc_pitch_synch; /* True if lpc frames are pitch synchronous */ short *allsignal; /* used in group files */ - unsigned char *allulawsignal; + unsigned char *allualawsignal; float *allframes; short *allframesshort; --- festival-1.4.3.orig/src/modules/donovan/t2s.h +++ festival-1.4.3/src/modules/donovan/t2s.h @@ -261,10 +261,6 @@ /* transcribe.c */ void transcribe(CONFIG *config, LING_LIST *ling_list); -/* ulaw.c */ -unsigned char linear2ulaw(int sample); -int ulaw2linear(unsigned char ulawbyte); - /* utils.c */ char **split(char *in); void tidy_split(char **root); --- festival-1.4.3.orig/lib/init.scm +++ festival-1.4.3/lib/init.scm @@ -75,10 +74,6 @@ ;;; Set default audio method (cond - ((member 'nas *modules*) - (Parameter.def 'Audio_Method 'netaudio)) - ((member 'esd *modules*) - (Parameter.def 'Audio_Method 'esdaudio)) ((member 'sun16audio *modules*) (Parameter.def 'Audio_Method 'sun16audio)) ((member 'freebsd16audio *modules*) @@ -93,6 +88,10 @@ (Parameter.def 'Audio_Method 'os2audio)) ((member 'mplayeraudio *modules*) (Parameter.def 'Audio_Method 'mplayeraudio)) + ((member 'nas *modules*) + (Parameter.def 'Audio_Method 'netaudio)) + ((member 'esd *modules*) + (Parameter.def 'Audio_Method 'esdaudio)) (t ;; can't find direct support so guess that /dev/audio for 8k ulaw exists (Parameter.def 'Audio_Method 'sunaudio))) ;;; If you have an external program to play audio add its definition