001 /*
002 * To change this template, choose Tools | Templates
003 * and open the template in the editor.
004 */
005
006 package org.util.xml.parse;
007
008 import org.util.xml.parse.policy.ParserPolicy;
009 import java.io.BufferedReader;
010 import java.io.File;
011 import java.io.InputStream;
012 import java.io.InputStreamReader;
013 import java.io.Reader;
014 import java.io.UnsupportedEncodingException;
015 import java.io.IOException;
016 import java.net.URI;
017 import java.util.ArrayList;
018 import javax.swing.JOptionPane;
019 import org.util.xml.element.Attributes;
020 import org.util.xml.element.Element;
021 import org.util.xml.element.TagElement;
022 import org.util.xml.element.TextElement;
023 import org.util.xml.parse.policy.*;
024 import org.util.xml.parse.XMLParseException;
025
026
027 /**
028 *
029 * @author masaru
030 */
031 public class ElementParser {
032
033 private Reader reader_;
034 protected static ElementPartParser element_part_parser_;
035 private Element[] result_;
036 private ParserPolicy policy_;
037 private int tab_count_;
038 private String encoding_ = "unknown";
039 private boolean select_encoding_after_readeing_first_line_;
040 private InputStream is_;
041 private ElementParser data_source_;
042 private URI document_base_;
043 private ArrayList<ParserPolicy> policy_stack_ = new ArrayList<ParserPolicy>();
044
045 protected ElementParser(){}
046 public ElementParser(Reader reader) {
047 init(reader);
048 }
049 public ElementParser(InputStream is) {
050 init(is);
051 }
052 public ElementParser(InputStream is, String encoding) throws UnsupportedEncodingException {
053 init(new InputStreamReader(is,encoding));
054 }
055 public void setDocumentBase(URI document_base) {
056 document_base_ = document_base;
057 }
058 public URI getDocumentBase() {
059 return document_base_;
060 }
061 public Element[] createSubElements(String sub_path) throws Exception {
062 ElementParser sub_parser = createSubParser(sub_path);
063 sub_parser.parse();
064 return sub_parser.getResult();
065
066 }
067 public ElementParser createSubParser(String sub_path) throws Exception {
068 URI uri = null;
069 if(document_base_ != null)
070 uri = document_base_.resolve(sub_path);
071 else {
072 uri = new File(sub_path).toURI();
073 }
074 ElementParser sub_parser = new ElementParser(uri.toURL().openStream());
075 sub_parser.setDocumentBase(uri.resolve(".."));
076 sub_parser.setPolicy(policy_);
077 return sub_parser;
078 }
079
080 private void initParsers() {
081 if(policy_==null) {
082 policy_ = new DefaultParserPolicy() {
083 String encoding_;
084 public boolean forceEmptyTag(String key) {
085 return false;
086 }
087 public Element allowElement(Element element) {
088 if(encoding_ == null) {
089 if(element.isTagElement()){
090 TagElement te = (TagElement)element;
091 if(te.isPI())
092 encoding_ = te.getAttributeValue("encoding");
093 }
094 if(encoding_ == null) encoding_ = "utf-8";
095 }
096 return element;
097 }
098
099 public String selectEncoding(String last_tag_key) {
100 return encoding_;
101 }
102 };
103 }
104 element_part_parser_ = new ElementPartParser(policy_);
105 }
106
107 public void setPolicy(ParserPolicy policy) {
108 policy_ = policy;
109 initParsers();
110 }
111
112 private void init(InputStream is) {
113 initParsers();
114 select_encoding_after_readeing_first_line_ = true;
115 is_ = is;
116 }
117 private void init(Reader reader){
118 initParsers();
119 if(reader instanceof InputStreamReader){
120 encoding_ = ((InputStreamReader)reader).getEncoding();
121 reader_ = new BufferedReader(reader);
122 }else
123 reader_ = reader;
124 }
125
126 public void error(ParseElement source) {
127
128 System.err.println("error: ");
129 System.err.println(source);
130 try{
131 for(int i=0;i<1000;i++) {
132 System.err.print((char)get());
133 }
134 }catch(Exception e) {}
135 }
136
137
138
139 public Element[] parse() throws IOException, XMLParseException {
140 tab_count_ = 0;
141
142 ArrayList<Element> list = new ArrayList<Element>();
143
144 if (reader_ != null) {
145 data_source_ = this;
146 }
147 else {
148 data_source_ = new ElementParser() {
149 public int get() throws IOException {
150 return is_.read();
151 }
152 };
153 }
154 element_part_parser_.error_text_ = new StringBuffer();
155
156 try {
157 int last = parse(data_source_.get(), list);
158 }
159 catch(XMLParseException e){
160 if(e.getMessage().equals("$cancel"))
161 System.out.println("parse canelled");
162 else
163 throw e;
164 }
165 catch(IOException e){
166 if(e.getMessage().equals("$cancel"))
167 System.out.println("parse canelled");
168 else
169 throw e;
170 }
171 /*if( last == -1)
172 System.out.println("end of stream.(ok)");
173 else
174 System.out.println("! Not end of stream !");*/
175
176 result_ = list.toArray(new Element[]{});
177
178 //for(int i=0;i<list.size();i++)
179 // System.out.println(list.get(i).toString());
180 return result_;
181 }
182
183 private int parse(int next, ArrayList<Element> list) throws XMLParseException, IOException {
184
185 Element element = null;
186
187 while(next>=0) {
188
189 next = element_part_parser_.parse(next, data_source_);
190
191 if(element_part_parser_.is_error_ && policy_.throwExceptionIfDocumentHasError())
192 throw new XMLParseException(element_part_parser_.error_text_.toString());
193
194 if(element_part_parser_.isTextElement()) {
195 TextElement text_element = element_part_parser_.getTextElement();
196 element = policy_.allowElement(text_element);
197
198 } else if(element_part_parser_.isTagElement()){
199
200 TagElement tag_element = element_part_parser_.getTagElement();
201 //System.out.println(tag_element);
202 //JOptionPane.showMessageDialog(null, tag_element.getKey());
203
204 if(reader_ == null) {
205 String encoding = policy_.selectEncoding(tag_element.getKey());
206 if(encoding != null) {
207 //System.out.println("set encoding: "+encoding);
208 encoding_ = encoding;
209 try {
210 reader_ = new BufferedReader(new InputStreamReader(is_, encoding));
211 }
212 catch(UnsupportedEncodingException exc) {
213 throw new XMLParseException(exc.toString());
214 }
215 data_source_ = this;
216 }
217 }
218
219 //System.out.println("\nkey: "+tag_element.getKey());
220 //System.out.println("att: "+tag_element.getAttributes());
221
222 if(element_part_parser_.isStartTag()){
223 //JOptionPane.showMessageDialog(null, "start tag:\n");
224 policy_stack_.add(policy_);
225 policy_ = policy_.getInnerPolicy(tag_element);
226
227 tab_count_++;
228 String start_key = tag_element.getKey();
229 //System.out.println("start tag: "+start_key);
230
231 ArrayList<Element> children = new ArrayList<Element>();
232 next = parse(next, children);
233
234 String end_key = element_part_parser_.getEndTagName();
235 if(policy_.checkEndTag())
236 if(!start_key.equals(end_key) && policy_.throwExceptionIfDocumentHasError()) {
237 String message = "end tag does not match! (start:"+start_key+" end:"+end_key+")";
238 if(element_part_parser_.is_error_)
239 element_part_parser_.error_text_.append(message);
240 else
241 throw new XMLParseException(message);
242 }
243 // throw new Exception("parse error: "+end_key+" does not match "+start_key);
244
245 tag_element.setChildren(children.toArray(new Element[]{}));
246 tab_count_--;
247 //System.out.println("end children :"+tag_element.getKey());
248 if(policy_stack_.size()>0)
249 policy_ = policy_stack_.remove(policy_stack_.size()-1);
250 }
251
252 //JOptionPane.showMessageDialog(null, "add to list:\n"+tag_element.getKey());
253 tag_element.setDocumentBase(getDocumentBase());
254
255 element = policy_.allowElement(tag_element);
256
257 //System.out.println("add-----------------------");
258 } else { // end tag
259 if(!policy_.forceEmptyTag(element_part_parser_.getEndTagName()))
260 return next;
261 element = null;
262 }
263
264 if(element != null) list.add(element);
265
266 next = element_part_parser_.parse(next, data_source_);
267
268 if(element_part_parser_.is_error_ && policy_.throwExceptionIfDocumentHasError())
269 throw new XMLParseException(element_part_parser_.error_text_.toString());
270
271 if(element_part_parser_.isTextElement()) {
272 TextElement text_element = element_part_parser_.getTextElement();
273 element = policy_.allowElement(text_element);
274
275 } else if(element_part_parser_.isTagElement()){
276
277 TagElement tag_element = element_part_parser_.getTagElement();
278 //System.out.println(tag_element);
279 //JOptionPane.showMessageDialog(null, tag_element.getKey());
280
281 if(reader_ == null) {
282 String encoding = policy_.selectEncoding(tag_element.getKey());
283 if(encoding != null) {
284 //System.out.println("set encoding: "+encoding);
285 encoding_ = encoding;
286 try {
287 reader_ = new BufferedReader(new InputStreamReader(is_, encoding));
288 }
289 catch(UnsupportedEncodingException exc) {
290 throw new XMLParseException(exc.toString());
291 }
292 data_source_ = this;
293 }
294 }
295
296 //System.out.println("\nkey: "+tag_element.getKey());
297 //System.out.println("att: "+tag_element.getAttributes());
298
299 if(element_part_parser_.isStartTag()){
300 //JOptionPane.showMessageDialog(null, "start tag:\n");
301 policy_stack_.add(policy_);
302 policy_ = policy_.getInnerPolicy(tag_element);
303 if(policy_.finished())
304 throw new XMLParseException("$cancel");
305
306 tab_count_++;
307 String start_key = tag_element.getKey();
308 //System.out.println("start tag: "+start_key);
309
310 ArrayList<Element> children = new ArrayList<Element>();
311 next = parse(next, children);
312
313 String end_key = element_part_parser_.getEndTagName();
314 if(policy_.checkEndTag())
315 if(!start_key.equals(end_key) && policy_.throwExceptionIfDocumentHasError()) {
316 String message = "end tag does not match! (start:"+start_key+" end:"+end_key+")";
317 if(element_part_parser_.is_error_)
318 element_part_parser_.error_text_.append(message);
319 else
320 throw new XMLParseException(message);
321 }
322 // throw new Exception("parse error: "+end_key+" does not match "+start_key);
323
324 tag_element.setChildren(children.toArray(new Element[]{}));
325 tab_count_--;
326 //System.out.println("end children :"+tag_element.getKey());
327 if(policy_stack_.size()>0)
328 policy_ = policy_stack_.remove(policy_stack_.size()-1);
329
330 if(policy_.finished())
331 throw new XMLParseException("$cancel");
332 }
333
334 //JOptionPane.showMessageDialog(null, "add to list:\n"+tag_element.getKey());
335 tag_element.setDocumentBase(getDocumentBase());
336
337 element = policy_.allowElement(tag_element);
338
339 //System.out.println("add-----------------------");
340 } else { // end tag
341 if(!policy_.forceEmptyTag(element_part_parser_.getEndTagName()))
342 return next;
343 element = null;
344 }
345
346 if(element != null) list.add(element);
347
348 if(policy_.finished())
349 throw new XMLParseException("$cancel");
350 }
351 return -1;
352 }
353
354
355 public int escape (String message) throws XMLParseException, IOException {
356 int next = -1;
357 try { throw new Exception("mark");}catch(Exception e){e.printStackTrace();}
358 System.err.println("this documents has error: "+message);
359 System.err.println("skip---------------------");
360 int c = get();
361 System.err.print((char)c);
362 while(c!='>' && c!=-1) System.err.print((char)(c=get()));
363 // for(int i=0;i<3000;i++) System.err.print((char)(c=parser.get()));
364 System.err.println("\n-------------------------");
365 return get();
366 }
367
368 public Element[] getResult() {
369 return result_;
370 }
371 public TagElement getFirstPlainTagElement() {
372 for(Element tmp : result_)
373 if(tmp.isTagElement()) {
374 TagElement tag = (TagElement)tmp;
375 if(!tag.isPI())
376 return tag;
377 }
378 return null;
379 }
380
381 public String getEncoding() {
382 return encoding_;
383 }
384
385 int counter = 0;
386 long start = System.currentTimeMillis();
387 public int get() throws IOException {
388 return reader_.read();
389
390 // int val = reader_.read();
391 // counter++;
392 //System.out.print("["+(char)val+"]");
393 // return val;
394 }
395 public char getChar() throws IOException {
396 int b = get();
397 if(b==-1) throw new IOException("end of stream.");
398 return (char)b;
399 }
400 }