44 #include "EST_string_aux.h"
45 #include "EST_FileType.h"
46 #include "EST_Token.h"
47 #include "ling_class/EST_Utterance.h"
48 #include "EST_UtteranceFile.h"
60 static EST_write_status utt_save_all_contents(ostream &outf,
63 static EST_write_status utt_save_all_contents(ostream &outf,
67 static EST_write_status utt_save_ling_content(ostream &outf,
77 if (node->unref_relation(
"__READ__"))
97 if ((r = read_est_header(ts, hinfo, ascii, t)) != format_ok)
99 if (t != est_file_utterance)
100 return misc_read_error;
101 if (hinfo.
ival(
"version") != 2)
103 if (hinfo.
ival(
"version") == 3)
104 EST_warning(
"Loading est utterance format version 3, ladders will not be understood");
107 EST_error(
"utt_load: %s wrong version of utterance format expected 2 (or 3) but found %d",
113 if (ts.
get() !=
"Features")
116 " missing utterance features section" << endl;
117 return misc_read_error;
122 if (ts.
get() !=
"Stream_Items")
125 " missing Items section" << endl;
126 return misc_read_error;
129 r = load_all_contents(ts, sitems, max_id);
132 if (ts.
peek() ==
"Streams")
134 cerr <<
"utt.load: streams found in utterance file, " <<
135 "no longer supported" << endl;
136 return misc_read_error;
140 if ((r == format_ok) && (ts.
get() !=
"Relations"))
143 " missing Relations section" << endl;
144 return misc_read_error;
147 r = load_relations(ts, u, sitems);
149 if ((r == format_ok) && (ts.
get() !=
"End_of_Utterance"))
152 " End_of_Utterance expected but not found" << endl;
153 return misc_read_error;
160 for(
int ni=0; ni < sitems.length(); ni++)
183 while (ts.
peek() !=
"End_of_Stream_Items")
191 Sid = ts.
get().string();
197 " Item name not a number: " << Sid << endl;
198 return misc_read_error;
200 if (
id >= sitems.
length())
206 if (si->
f.
load(ts) != format_ok)
207 return misc_read_error;
208 idval = si->
f.
I(
"id",0);
212 return misc_read_error;
228 while (ts.
peek() !=
"End_of_Relations")
234 if (r->
load(ts,sitems) != format_ok)
235 return misc_read_error;
241 return misc_read_error;
250 EST_write_status EST_UtteranceFile::save_est_ascii(ostream &outf,
const EST_Utterance &utt)
252 EST_write_status v = write_ok;
255 outf.setf(ios::fixed, ios::floatfield);
258 outf <<
"EST_File utterance\n";
259 outf <<
"DataType ascii\n";
260 outf <<
"version 2\n";
261 outf <<
"EST_Header_End\n";
268 outf <<
"Stream_Items\n";
270 v = utt_save_all_contents(outf,utt,sinames);
271 if (v == write_fail)
return v;
272 outf <<
"End_of_Stream_Items\n";
275 outf <<
"Relations\n";
279 v = relation(p->v)->save(outf,sinames);
280 if (v == write_fail)
return v;
282 outf <<
"End_of_Relations\n";
284 outf <<
"End_of_Utterance\n";
288 static EST_write_status utt_save_all_contents(ostream &outf,
297 EST_write_status v = write_ok;
303 v = utt_save_all_contents(outf,relation(p->v)->head(),
305 if (v == write_fail)
return v;
311 static EST_write_status utt_save_all_contents(ostream &outf,
320 utt_save_ling_content(outf,n,sinames,si_count);
323 utt_save_all_contents(outf,inext(n),sinames,si_count);
324 utt_save_all_contents(outf,idown(n),sinames,si_count);
329 static EST_write_status utt_save_ling_content(ostream &outf,
336 if ((si != 0) && (!sinames.
present(si->contents())))
338 sinames.
add_item(si->contents(),si_count);
339 outf << si_count <<
" ";
340 si->features().
save(outf);
352 EST_read_status status = read_ok;
358 status = rel->
load(
"", ts,
"esps");
373 EST_write_status EST_UtteranceFile::save_xlabel(ostream &outf,
376 EST_write_status status = write_error;
384 rel = ::relation(p->v);
391 if (iup(hd) || idown(hd))
398 return rel->
save(outf,
"esps", 0);
406 #if defined(INCLUDE_XML_FORMATS)
421 long pos=ftell(stream);
426 fgets(buf, 80, stream);
428 if (strncmp(buf,
"<?xml", 5) != 0)
429 return read_format_error;
431 fgets(buf, 80, stream);
433 if (strncmp(buf,
"<!DOCTYPE apml", 14) != 0)
434 return read_format_error;
437 fseek(stream, pos, 0);
439 EST_read_status stat = apml_read(stream, ts.
filename(),u, max_id);
442 fseek(stream, pos, 0);
459 long pos=ftell(stream);
464 fgets(buf, 80, stream);
466 if (strncmp(buf,
"<?xml", 5) != 0)
467 return read_format_error;
470 fseek(stream, pos, 0);
472 EST_read_status stat = EST_GenXML::read_xml(stream, ts.
filename(),u, max_id);
475 fseek(stream, pos, 0);
480 EST_write_status EST_UtteranceFile::save_genxml(ostream &outf,
483 EST_write_status status=write_ok;
495 for (fp.begin(hd->features()); fp; ++fp)
496 features.add_item(fp->k, 1);
501 outf <<
"<?xml version='1.0'?>\n";
503 outf <<
"<!DOCTYPE utterance PUBLIC '//CSTR EST//DTD cstrutt//EN' 'cstrutt.dtd'\n\t[\n";
507 outf <<
"\t<!ATTLIST item\n";
508 for (f.begin(features); f; ++f)
512 outf <<
"\t\t" << f->k <<
"\tCDATA #IMPLIED\n";
518 outf <<
"<utterance>\n";
519 outf <<
"<language name='unknown'/>\n";
530 if (iup(hd) || idown(hd))
538 outf <<
"<relation name='"<< rel->
name()<<
"' structure-type='list'>\n";
546 for (p.
begin(hd->features()); p; ++p)
547 if (p->k !=
"estContentFeature")
548 outf <<
" " << p->k <<
"='" << p->v <<
"'\n";
555 outf <<
"</relation>\n";
558 status=write_partial;
562 outf <<
"</utterance>\n";
569 EST_String EST_UtteranceFile::options_short(
void)
573 for(
int n=0; n< EST_UtteranceFile::map.n() ; n++)
575 EST_UtteranceFileType type = EST_UtteranceFile::map.nth_token(n);
576 if (type != uff_none)
578 for(
int ni=0; ni<NAMED_ENUM_MAX_SYNONYMS; ni++)
580 const char *nm = EST_UtteranceFile::map.name(type, ni);
594 EST_String EST_UtteranceFile::options_supported(
void)
596 EST_String s(
"Available utterance file formats:\n");
598 for(
int n=0; n< EST_UtteranceFile::map.n() ; n++)
600 EST_UtteranceFileType type = EST_UtteranceFile::map.nth_token(n);
601 if (type != uff_none)
603 const char *d = EST_UtteranceFile::map.info(type).description;
604 for(
int ni=0; ni<NAMED_ENUM_MAX_SYNONYMS; ni++)
606 const char *nm = EST_UtteranceFile::map.name(type, ni);
620 Start_TNamedEnumI_T(EST_UtteranceFileType, EST_UtteranceFile::Info, EST_UtteranceFile::map, utterancefile)
621 { uff_none, { NULL },
622 { FALSE, NULL, NULL,
"unknown utterance file type"} },
623 { uff_est, {
"est",
"est_ascii"},
624 { TRUE, EST_UtteranceFile::load_est_ascii, EST_UtteranceFile::save_est_ascii,
"Standard EST Utterance File" } },
625 #if defined(INCLUDE_XML_FORMATS)
626 { uff_apml, {
"apml",
"xml"},
627 { TRUE, EST_UtteranceFile::load_apml, NULL,
"Utterance in APML" } },
628 { uff_genxml, {
"genxml",
"xml"},
629 { TRUE, EST_UtteranceFile::load_genxml, EST_UtteranceFile::save_genxml,
"Utterance in XML, Any DTD" } },
631 { uff_xlabel, {
"xlabel"},
632 { TRUE, EST_UtteranceFile::load_xlabel, EST_UtteranceFile::save_xlabel,
"Xwaves Label File" } },
634 { FALSE, NULL, NULL,
"unknown utterance file type"} }
636 End_TNamedEnumI_T(EST_UtteranceFileType, EST_UtteranceFile::Info, EST_UtteranceFile::map, utterancefile)
638 Declare_TNamedEnumI(EST_UtteranceFileType, EST_UtteranceFile::Info)
640 #if defined(INSTANTIATE_TEMPLATES)
641 #include "../base_class/EST_TNamedEnum.cc"
642 Instantiate_TNamedEnumI(EST_UtteranceFileType, EST_UtteranceFile::Info)
647 #if defined(INSTANTIATE_TEMPLATES)
649 #include "../base_class/EST_TSimpleVector.cc"
650 #include "../base_class/EST_TVector.cc"
651 #include "../base_class/EST_Tvectlist.cc"
void set_val(const EST_String &name, const EST_Val &sval)
EST_read_status load(EST_TokenStream &ts)
load features from already opened EST_TokenStream
EST_write_status save(ostream &outf) const
save features in already opened ostream
const int I(const EST_String &path) const
EST_Features f
General features for this item.
void set(const EST_String &name, int ival)
const float F(const EST_String &name) const
int ival(const EST_String &rkey, int m=1) const
const EST_String & name() const
EST_read_status load(const EST_String &filename, const EST_String &type="esps")
EST_write_status save(const EST_String &filename, bool evaluate_ff=false) const
void set_utt(EST_Utterance *u)
static EST_String cat(const EST_String s1, const EST_String s2=Empty, const EST_String s3=Empty, const EST_String s4=Empty, const EST_String s5=Empty, const EST_String s6=Empty, const EST_String s7=Empty, const EST_String s8=Empty, const EST_String s9=Empty)
void begin(const Container &over)
Set the iterator ready to run over this container.
int add_item(const K &rkey, const V &rval, int no_search=0)
add key-val pair to list
const int present(const K &rkey) const
Returns true if key is present.
void resize(int n, int set=1)
INLINE int length() const
number of items in vector.
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
const EST_String filename() const
The originating filename (if there is one)
const EST_String pos_description()
A string describing current position, suitable for error messages.
FILE * filedescriptor()
For the people who need the actual description (if possible)
void set_quotes(char q, char e)
set characters to be used as quotes and escape, and set quote mode
EST_Token & peek(void)
peek at next token
EST_TokenStream & get(EST_Token &t)
get next token in stream
EST_Features f
Utterance level features.
EST_Features relations
The list of named relations.
void clear()
remove everything in utterance
EST_Relation * create_relation(const EST_String &relname)
create a new relation called <parameter>n</parameter>.