001 /*
002 * To change this template, choose Tools | Templates
003 * and open the template in the editor.
004 */
005
006 package org.util.xml.parse.policy;
007
008 import org.util.xml.parse.policy.ParserPolicy;
009 import javax.swing.JOptionPane;
010 import org.util.xml.element.Element;
011 import org.util.xml.element.TagElement;
012
013 /**
014 *
015 * @author masaru
016 */
017 public class HTMLParserPolicy extends DefaultParserPolicy {
018
019 protected String[] forse_empty_tag_list_ = {"br","hr","meta","link","img","input","base","dd","dt","frame","p","pre","li","space"};
020 protected String encoding_ = null;
021
022 public boolean checkEndTagMatch() {
023 return false;
024 }
025 public boolean forceEmptyTag(String key) {
026 for(int i=0;i<forse_empty_tag_list_.length;i++)
027 if(forse_empty_tag_list_[i].equals(key.toLowerCase()))
028 return true;
029 return false;
030 }
031
032 public Element allowElement(Element element) {
033 // JOptionPane.showMessageDialog(null, "check\n"+element);
034 if(encoding_ == null && element.isTagElement()) {
035 TagElement telement = (TagElement)element;
036 if(telement.getKey().toLowerCase().equals("meta")) {
037 if("content-type".equals(telement.getAttributeValue("http-equiv","").toLowerCase())) {
038 String contenttext = telement.getAttributeValue("content");
039 String encoding = null;
040 int point = contenttext.indexOf("charset");
041 if(point != -1) {
042 for(int i=point+"charset".length();i<contenttext.length()&&encoding==null;i++) {
043 char c = contenttext.charAt(i);
044 if(c!=' ' && c!='=')
045 encoding = contenttext.substring(i, contenttext.length());
046 }
047 System.out.println("found encoding: "+ encoding);
048 encoding_ = encoding;
049 }
050 }
051 }
052 }
053 // JOptionPane.showMessageDialog(null, "encoding:"+encoding_);
054 return element;
055 }
056
057 public String selectEncoding(String last_tag_key) {
058 if(last_tag_key!=null && last_tag_key.toLowerCase().equals("body")) {
059 encoding_ = "JISAutoDetect";
060 encoding_ = JOptionPane.showInputDialog("<html>encoding does not defained before reading body tag<br/>select encoding</html>",encoding_);
061 }
062 return encoding_;
063 }
064 }