001 /*
002 * To change this template, choose Tools | Templates
003 * and open the template in the editor.
004 */
005
006 package org.util.xml.parse;
007
008 import java.io.PrintWriter;
009 import java.io.StringWriter;
010 import java.io.IOException;
011 import org.util.xml.parse.policy.ParserPolicy;
012 import org.util.xml.element.Attribute;
013 import org.util.xml.element.TagElement;
014 import org.util.xml.element.TextElement;
015
016 /**
017 *
018 * @author masaru
019 */
020 public class ElementPartParser extends ParseElement {
021
022 // parsers
023 protected NameParser name_parser_;
024 protected SpaceParser space_parser_;
025 protected AttributeParser attribute_parser_;
026 protected TextElementParser text_element_parser_;
027 protected CommentInnerTagElementParser comment_inner_tag_element_parser_;
028
029 protected ParserPolicy policy_;
030
031 protected boolean is_novalue_occuered_;
032 protected boolean is_error_;
033
034 protected TagElement tag_element_;
035 protected TextElement text_element_;
036 protected String endtag_name_;
037 protected boolean is_start_tag_;
038 protected StringBuffer error_text_;
039
040 public ElementPartParser() {
041 this(null);
042 }
043
044 public ElementPartParser(ParserPolicy policy) {
045 policy_ = policy;
046
047 name_parser_ = new NameParser();
048 space_parser_ = new SpaceParser();
049 attribute_parser_ = new AttributeParser();
050 text_element_parser_ = new TextElementParser();
051 comment_inner_tag_element_parser_ = new CommentInnerTagElementParser();
052 error_text_ = new StringBuffer();
053 }
054
055 @Override
056 public boolean match(char c) {
057 return c=='<';
058 }
059
060 @Override
061 public int parse(int c, ElementParser parser) throws XMLParseException, IOException {
062
063 is_error_ = false;
064 is_start_tag_ = false;
065 endtag_name_ = null;
066 text_element_ = null;
067 tag_element_ = null;
068 is_novalue_occuered_ = false;
069
070 int next_word_ = -1;
071 int state = 0;
072 try{
073
074 while(true) {
075
076 //System.out.println("state:"+state+"|"+(char)c);
077 if(state == 0) {
078 if(c=='<') state = 2;
079 else if(isSpace(c)) ;
080 else if(text_element_parser_.match((char)c)){
081 c = text_element_parser_.parse(c, parser);
082 text_element_ = new TextElement(text_element_parser_.getReturnValue());
083 break;
084 } else escape(parser, "parse error: cannot read tag: state=0 ???");
085 }else if(state == 1) {
086 if(c=='<') state = 2;
087 else throw new XMLParseException("parse error: cannot read tag: state=1 ???");
088 }else if(state == 2) {
089 if((c=='/')) state = 6;
090 else if(c=='?') {
091 state = 9;
092 } else if(comment_inner_tag_element_parser_.match((char)c)) {
093 c = comment_inner_tag_element_parser_.parse(c, parser);
094 text_element_ = comment_inner_tag_element_parser_.getResult();
095 break;
096 } else if(name_parser_.match((char)c)) {
097 c = name_parser_.parse(c, parser);
098 String key = name_parser_.getReturnValue();
099 tag_element_ = new TagElement(key);
100 if(policy_ != null && policy_.forceEmptyTag(key))
101 tag_element_.setEmpty(true);
102 else
103 is_start_tag_ = true;
104 state = 10;
105 continue;
106 } else throw new XMLParseException("parse error: cannot read tag: this charactar is not allowed at start of tag ("+(char)c+")");
107 }else if(state == 3) {
108 next_word_ = c;
109 break;
110 } else if(state == 4) {
111 tag_element_.setEmpty(true);
112 is_start_tag_ = false;
113 if(c=='>') break;
114 else throw new XMLParseException("parse error: cannot read tag: [<.../"+(char)c+"]");
115 }else if(state == 6) {
116 c = name_parser_.parse(c, parser);
117 endtag_name_ = name_parser_.getReturnValue();
118 state = 7;
119 continue;
120 }else if(state == 7) {
121 if(c=='>') {
122 break;
123 } else if(space_parser_.match((char)c)) {
124 c = space_parser_.parse(c, parser);
125 continue;
126 } else throw new XMLParseException("parse error: cannot read tag: state=7 cannot find '>'");
127 }else if(state == 9) {
128 c = name_parser_.parse(c, parser);
129 tag_element_ = new TagElement(name_parser_.getReturnValue());
130 tag_element_.setPI(true);
131 state = 10;
132 continue;
133 }else if(state == 10) {
134 if(space_parser_.match((char)c)) {
135 c = space_parser_.parse(c, parser);
136 state = 11;
137 continue;
138 } else {
139 if(tag_element_.isPI()){
140 if(c=='?') state = 4;
141 else return escape(parser,"in <? ... ?> tag");
142 } else {
143 if(c=='>') break;
144 else if(c=='/') state = 4;
145 else if(is_novalue_occuered_) {
146 c = attribute_parser_.parse(c, parser);
147 Attribute attribute = attribute_parser_.getAttribute();
148 is_novalue_occuered_ = attribute.isNovalue();
149 tag_element_.addAttribute(attribute);
150 state = 10;
151 continue;
152 } else return escape(parser,"cannot read "+(char)c);
153 }
154 }
155 }else if(state == 11) {
156 if(attribute_parser_.match((char)c)) {
157 c = attribute_parser_.parse(c, parser);
158 Attribute attribute = attribute_parser_.getAttribute();
159 is_novalue_occuered_ = attribute.isNovalue();
160 tag_element_.addAttribute(attribute);
161 state = 10;
162 continue;
163 } else {
164 if(tag_element_.isPI()){
165 if(c=='?') state = 4;
166 else throw new XMLParseException("parse error: cannot read tag: state=11");
167 } else {
168 if(c=='>') break;
169 else if(c=='/') state = 4;
170 else throw new XMLParseException("parse error: cannot read tag: state=11");
171 }
172 }
173 }
174 if(state==0)
175 c = parser.get();
176 else
177 c = parser.getChar();
178 }
179
180 } catch(IOException e) {
181 is_error_ = true;
182 StringWriter sw = new StringWriter();
183 e.printStackTrace(new PrintWriter(sw));
184 if(policy_.throwExceptionIfDocumentHasError())
185 error_text_.append(sw.toString());
186 parser.escape(e.getMessage());
187 } catch(XMLParseException e) {
188 is_error_ = true;
189 StringWriter sw = new StringWriter();
190 e.printStackTrace(new PrintWriter(sw));
191 if(policy_.throwExceptionIfDocumentHasError())
192 error_text_.append(sw.toString());
193 parser.escape(e.getMessage());
194 }
195
196 int result = -1;
197 try {
198 if(text_element_!=null)
199 result = c;
200 else
201 result = parser.get();
202 }
203 catch (IOException e) {
204 throw new XMLParseException(e.toString());
205 }
206 return result;
207 }
208
209 public int escape (ElementParser parser,String message) throws XMLParseException, IOException {
210 //try{throw new Exception("mark");}catch(Exception e){e.printStackTrace();}
211 is_error_ = true;
212 System.err.println("this documents has error: "+message);
213 System.err.println("skip---------------------");
214 int c = parser.get();
215 System.err.print((char)c);
216 while(c!='>' && c!=-1) System.err.print((char)(c=parser.get()));
217 // for(int i=0;i<3000;i++) System.err.print((char)(c=parser.get()));
218 System.err.println("\n-------------------------");
219 return parser.get();
220 }
221
222 public boolean isTagElement() {
223 return (tag_element_!=null);
224 }
225 public boolean isTextElement() {
226 return (text_element_!=null);
227 }
228 public boolean isStartTag() {
229 return is_start_tag_;
230 }
231 public TextElement getTextElement() {
232 return text_element_;
233 }
234 public TagElement getTagElement() {
235 return tag_element_;
236 }
237 public String getEndTagName() {
238 return endtag_name_;
239 }
240 }