'tera',
'doctype' => 'html',
'xhtml' => False,
'nl' => "\r\n",
'empty_tag_end' => ''
);
//*********************************************************************
// regular expression
private $__re = array(
'nl' => '/(\r\n|\r|\n)/s',
'br' => '/
/si',
'bom' => '/\A\xef\xbb\xbf/s',
'xml' => '/\A<\?xml\s+.*?\?>\s*/s',
'doctype' => '/\A<\!DOCTYPE\s+(?P[^>]+)>\s*(?P.*)\z/si',
'doctype_xhtml' => '/\Axhtml(\s+[^>]*)?\z/si',
'html' => '/\A.*\z/s',
'parse' => '/\A(?P.*?)(?P(<\!--.*?-->|<\!\[CDATA\[.*?\]\]>|<.*?>))(?P.*)\z/s',
'parse_comment' => '/\A<\!--(.*)-->\z/s',
'parse_cdata' => '/\A<\![CDATA[(.*)]]>\z/s',
'parse_tag_end' => '/\A<\/\s*(?P\S+)\s*>\z/s',
'parse_tag_start' => '/\A<(?P\S+)(?P.*?)(?P\/?)>\z/s',
'parse_script' => '/\A(?P.*?)<\/\s*script\s*>(?P.*)\z/si',
'parse_style' => '/\A(?P.*?)<\/\s*style\s*>(?P.*)\z/si',
'parse_textarea' => '/\A(?P.*?)<\/\s*textarea\s*>(?P.*)\z/si',
'parse_attribute_value' => '/\A\s*(?P\S+?)\s*=\s*(?P"[^"]*"|\'[^\']*\'|\S+)\s*(?P.*)\z/s',
'parse_value_quote' => '/\A"(.*)"\z/s',
'parse_value_apos' => '/\A\'(.*)\'\z/s',
'parse_attribute_name' => '/\A\s*(?P\S+)(?P.*)\z/s',
'replace_commands' => '/\s*;\s*/s',
'replace_commands_value' => '/\A\s*(?P@?)(?P\S+)\s*:\s*(?P\S+)\s*\z/s',
'replace_blank' => '/\A(?P.*?)(\r\n|\r|\n)[ \t]*(?P(\r\n|\r|\n).*)\z/s',
'replace_list' => '/\A(?P.*?)(?P(\r\n|\r|\n)[ \t]*)\z/s',
'replace_text_br' => '/(\r\n|\r|\n)/s',
'replace_inner_url' => '/\A\s*url\(\s*(?P[^()]*)\s*\)\s*\z/s',
'replace_inner_url_split' => '/(\s+|\s*,\s*)/s'
);
//*********************************************************************
// HTML
private function htmlEscape( $text, $br = false ) {
$text = str_replace(
array( "&", "\"", "<", ">" ),
array( "&", """, "<", ">" ),
$text
);
return $br ? preg_replace( $this->__re[ 'nl' ], $this->xhtml ? '
' : '
', $text ) : $text;
}
private function htmlUnEscape( $text, $br = false ) {
if ( $br ) $text = preg_replace( $this->__re[ 'br' ], $this->setting[ 'nl' ], $text );
return str_replace(
array( ">", "<", """, "'", "&" ),
array( ">", "<", "\"", "'", "&" ),
$text
);
}
//*********************************************************************
// __construct
public function __construct() {
}
//*********************************************************************
// Process
public function process( $html, $data, $setting = null ) {
if ( preg_match( $this->__re[ 'nl' ], $html, $match ) ) {
$this->setting[ 'nl' ] = $match[ 1 ];
}
$html = preg_replace( $this->__re[ 'bom' ], "", $html );
$html = preg_replace( $this->__re[ 'xml' ], "", $html );
if ( preg_match( $this->__re[ 'doctype' ], $html, $match ) ) {
$this->setting[ 'doctype' ] = $match[ 'doctype' ];
$html = $match[ 'html' ];
}
if ( preg_match( $this->__re[ 'doctype_xhtml' ], $this->setting[ 'doctype' ] ) ) {
$this->setting[ 'xhtml' ] = true;
$this->setting[ 'empty_tag_end' ] = "/";
}
if ( is_array( $setting ) ) {
$this->setting = array_merge( $this->seting, $setting );
}
if ( ! preg_match( $this->__re[ 'html' ], $html ) ) {
print "No html tag.";
return;
}
$document = new DOMDocument( '1.0', 'UTF-8' );
$this->__parse( $document, $document, $html );
$this->__replace( $document, $document->documentElement, $data );
$build = '';
if ( $this->setting[ 'doctype' ] ) {
$build .= "setting[ 'doctype' ]}>{$this->setting[ 'nl' ]}";
}
$build .= $this->__build( $document->documentElement );
return $build;
}
public function process_part( $html, $data, $setting = null ) {
if ( is_array( $setting ) ) {
$this->setting = array_merge( $this->seting, $setting );
}
$html = "{$html}";
$document = new DOMDocument( '1.0', 'UTF-8' );
$this->__parse( $document, $document, $html );
$this->__replace( $document, $document->documentElement, $data );
return $this->__build_childrens( $document->documentElement );
}
//*********************************************************************
// Parse
private function __parse( $document, $parentElement, &$html, $open = '' ) {
while ( $html !== '' ) {
if ( ! preg_match( $this->__re[ 'parse' ], $html, $match ) ) {
$parentElement->appendChild( $document->createTextNode( $html ) );
$html = '';
continue;
}
// Text (save raw string)
if ( $match[ 'text' ] !== "" ) {
$parentElement->appendChild( $document->createTextNode( $match[ 'text' ] ) );
}
$tag = $match[ 'tag' ];
$html = $match[ 'html' ];
// Comment
if ( preg_match( $this->__re[ 'parse_comment' ], $tag, $match ) ) {
$parentElement->appendChild( $document->createComment( $match[ 1 ] ) );
continue;
}
// CDATA
if ( preg_match( $this->__re[ 'parse_cdata' ], $tag, $match ) ) {
$parentElement->appendChild( $document->createCDATASection( $match[ 1 ] ) );
continue;
}
// /tag
if ( preg_match( $this->__re[ 'parse_tag_end' ], $tag, $match ) ) {
$tagName = strtolower( $match[ 'tagname' ] );
if ( ( $open == '' ) || ( $open != $tagName ) ) {
printf( "Not opened or invalid %s - %s", $open, $tagName );
}
return;
}
// tag
if ( preg_match( $this->__re[ 'parse_tag_start' ], $tag, $match ) ) {
$tagName = strtolower( $match[ 'tagname' ] );
$attributes = $match[ 'attributes' ];
$closed = ( $match[ 'end' ] == '/' ) ? true : in_array( $tagName, $this->__tag_empty );
$element = $document->createElement( $tagName );
$parentElement->appendChild( $element );
$this->__parse_attribute( $element, $attributes );
if ( $closed ) {
continue;
}
if ( in_array( $tagName, array( 'script', 'style', 'textarea' ) ) ) {
if ( preg_match( $this->__re[ "parse_{$tagName}" ], $html, $match ) ) {
if ( $match[ 'text' ] !== '' ) {
$element->appendChild( $document->createTextNode( $match[ 'text' ] ) );
}
$html = $match[ 'html' ];
} else {
$element->appendChild( $document->createTextNode( $html ) );
$html = '';
}
continue;
}
$this->__parse( $document, $element, $html, $tagName );
continue;
}
// <[^\S].*>
$parentElement->appendChild( $document->createTextNode( $tag ) );
}
return $html;
}
private function __parse_attribute( $element, $attributes ) {
while ( $attributes != '' ) {
if ( preg_match( $this->__re[ 'parse_attribute_value' ], $attributes, $match ) ) {
$name = strtolower( $match[ 'name' ] );
$value = $this->__parse_attribute_value( $name, $match[ 'value' ] );
$attributes = $match[ 'attributes' ];
$element->setAttribute( $name, $value );
continue;
}
if ( preg_match( $this->__re[ 'parse_attribute_name' ], $attributes, $match ) ) {
$name = strtolower( $match[ 'name' ] );
$attributes = $match[ 'attributes' ];
$element->setAttribute( $name, $name );
continue;
}
printf( "Ignore attribute '%s', '%s'.", $element->tagName, $attributes );
$attributes = '';
}
}
private function __parse_attribute_value( $name, $value ) {
if ( preg_match( $this->__re[ 'parse_value_quote' ], $value, $match ) ) {
$value = $match[ 1 ];
} else {
if ( preg_match( $this->__re[ 'parse_value_apos' ], $value, $match ) ) {
$value = $match[ 1 ];
}
}
// save raw string
return in_array( $name, $this->__attribute_url ) ? $value : $this->htmlUnEscape( $value );
}
//*********************************************************************
// Replace
private function __replace( $document, $element, $data ) {
$commands = $this->__replace_get_commands( $element );
// remove(dummy, fake), condition(if), condition_not(else)
$skip = false;
if ( $commands[ 'remove' ] ) {
$skip = true;
} else {
if ( $commands[ 'condition' ] ) {
$skip = isset( $data[ $commands[ 'condition' ] ] ) ? ( ! $data[ $commands[ 'condition' ] ] ) : true;
} else {
if ( $commands[ 'condition_not' ] ) {
$skip = isset( $data[ $commands[ 'condition_not' ] ] ) ? $data[ $commands[ 'condition_not' ] ] : false;
}
}
}
if ( $skip ) {
$parentNode = $element->parentNode;
$previousSibling = $element->previousSibling;
$nextSibling = $element->nextSibling;
$parentNode->removeChild( $element );
if ( $previousSibling && ( $previousSibling->nodeType == XML_TEXT_NODE ) ) {
if ( $nextSibling && ( $nextSibling->nodeType == XML_TEXT_NODE ) ) {
$parentNode->removeChild( $previousSibling );
$nextSibling->nodeValue = preg_replace( $this->__re[ 'replace_blank' ], '\1\3', $previousSibling->nodeValue . $nextSibling->nodeValue );
}
}
return;
}
if ( $commands[ 'list' ] ) {
$cloneNodes = array( $element );
$parentNode = $element->parentNode;
$previousSibling = $element->previousSibling;
$nextSibling = $element->nextSibling;
$anchor = $document->createElement( '__' );
$parentNode->insertBefore( $anchor, $element );
$parentNode->removeChild( $element );
if ( $previousSibling && ( $previousSibling->nodeType == XML_TEXT_NODE ) ) {
if ( $previousSibling && ( $previousSibling->nodeType == XML_TEXT_NODE ) ) {
if ( preg_match( $this->__re[ 'replace_list' ], $previousSibling->nodeValue, $match ) ) {
$previousSibling->nodeValue = $match[ 'before' ];
$cloneNodes = array(
$document->createTextNode( $match[ 'indent' ] ),
$element
);
}
}
}
if ( isset( $data[ $commands[ 'list' ] ] ) ) {
if ( is_array( $data[ $commands[ 'list' ] ] ) ) {
for ( $l = 0; $l < count( $data[ $commands[ 'list' ] ] ); $l++ ) {
foreach ( $data as $key => $val ) {
if ( $key == $commands[ 'list' ] ) continue;
if ( array_key_exists( $key, $data[ $commands[ 'list' ] ][ $l ] ) ) continue;
$data[ $commands[ 'list' ] ][ $l ][ $key ] = $val;
}
for ( $n = 0; $n < count( $cloneNodes ); $n++ ) {
$parentNode->insertBefore( $cloneNodes[ $n ]->cloneNode( true ), $anchor );
if ( $anchor->previousSibling->nodeType == XML_ELEMENT_NODE ) {
$this->__replace( $document, $anchor->previousSibling, $data[ $commands[ 'list' ] ][ $l ] );
}
}
}
}
}
$parentNode->removeChild( $anchor );
return;
}
if ( $commands[ 'outer' ] ) {
$commands[ 'wrap' ] = true;
$this->__replace_command_inner( $document, $element, $commands[ 'outer' ], $data );
}
if ( $commands[ 'inner' ] ) {
$this->__replace_command_inner( $document, $element, $commands[ 'inner' ], $data );
}
if ( $commands[ 'text_br' ] ) {
while ( $element->hasChildNodes() ) {
$element->removeChild( $element->firstChild );
}
if ( isset( $data[ $commands[ 'text_br' ] ] ) ) {
$text = $this->htmlEscape( $data[ $commands[ 'text_br' ] ], true );
$this->__parse( $document, $element, $text );
}
}
if ( $commands[ 'text' ] ) {
while ( $element->hasChildNodes() ) {
$element->removeChild( $element->firstChild );
}
if ( isset( $data[ $commands[ 'text' ] ] ) ) {
$element->appendChild( $document->createTextNode( $data[ $commands[ 'text' ] ] ) );
}
}
$childrens = array();
for ( $i = 0; $i < $element->childNodes->length; $i++ ) {
$childrens[] = $element->childNodes->item( $i );
}
foreach ( $childrens as $children ) {
if ( $children->nodeType == XML_ELEMENT_NODE ) {
$this->__replace( $document, $children, $data );
}
}
// script
if ( $element->tagName == 'script' ) {
for ( $i = 0; $i < $element->childNodes->length; $i++ ) {
$children = $element->childNodes->item( $i );
if ( $children->nodeType != XML_TEXT_NODE ) continue;
$text = $children->nodeValue;
if ( $text == "" ) continue;
$build = '';
$regexp = sprintf( '/\A(.*?)<%s\s+(\S+?)>(.*)\z/s', preg_quote( $this->setting[ 'attribute' ], '/' ) );
while ( preg_match( $regexp, $text, $match ) ) {
$build .= $match[ 1 ];
if ( isset( $data[ $match[ 2 ] ] ) ) $build .= $data[ $match[ 2 ] ];
$text = $match[ 3 ];
}
$children->nodeValue = $build . $text;
}
}
// wrap
if ( $commands[ 'wrap' ] ) {
$parentNode = $element->parentNode;
$previousSibling = $element->previousSibling;
$firstChild = $element->firstChild;
$lastChild = $element->lastChild;
$nextSibling = $element->nextSibling;
while ( $element->hasChildNodes() ) {
$parentNode->insertBefore( $element->firstChild, $element );
}
$parentNode->removeChild( $element );
if ( $previousSibling and ( $previousSibling->nodeType == XML_TEXT_NODE ) ) {
if ( $firstChild and ( $firstChild->nodeType == XML_TEXT_NODE ) ) {
$parentNode->removeChild( $previousSibling );
$firstChild->nodeValue = preg_replace( $this->__re[ 'replace_blank' ], '\1\3', $previousSibling->nodeValue . $firstChild->nodeValue );
}
}
if ( $lastChild and ( $lastChild->nodeType == XML_TEXT_NODE ) ) {
if ( $nextSibling and ( $nextSibling->nodeType == XML_TEXT_NODE ) ) {
$parentNode->removeChild( $lastChild );
$nextSibling->nodeValue = preg_replace( $this->__re[ 'replace_blank' ], '\1\3', $lastChild->nodeValue . $nextSibling->nodeValue );
}
}
} else {
// @ (attribute)
if ( $commands[ 'attributes' ] ) {
foreach ( $commands[ 'attributes' ] as $attribute ) {
if ( isset( $data[ $attribute[ 'value' ] ] ) ) {
if ( is_array( $data[ $attribute[ 'value' ] ] ) ) {
echo "Array -- @{$attribute[ 'name' ]}: {$attribute[ 'value' ]}";
} else {
$element->setAttribute( $attribute[ 'name' ], $data[ $attribute[ 'value' ] ] );
}
}
}
}
}
}
private function __replace_get_commands( $element ) {
$commands = array(
'remove' => false,
'condition' => '',
'condition_not' => '',
'list' => '',
'wrap' => false,
'outer' => '',
'inner' => '',
'text' => '',
'text_br' => '',
'attributes' => []
);
if ( $element->hasAttribute( $this->setting[ 'attribute' ] ) ) {
$values = preg_split( $this->__re[ 'replace_commands' ], $element->getAttribute( $this->setting[ 'attribute' ] ) );
$without_list = array();
foreach ( $values as $value ) {
if ( preg_match( $this->__re[ 'replace_commands_value' ], trim( $value ), $match ) ) {
if ( $match[ 'attribute' ] == '@' ) {
$without_list[] = $value;
$commands[ 'attributes' ][] = array( 'name' => $match[ 'name' ], 'value' => $match[ 'value' ] );
continue;
}
if ( in_array( $match[ 'name' ], array( 'list', 'record' ) ) ) {
$commands[ 'list' ] = $match[ 'value' ];
continue;
}
$without_list[] = $value;
if ( in_array( $match[ 'name' ], array( 'condition', 'if' ) ) ) {
$commands[ 'condition' ] = $match[ 'value' ];
continue;
}
if ( in_array( $match[ 'name' ], array( 'condition_not', 'else' ) ) ) {
$commands[ 'condition_not' ] = $match[ 'value' ];
continue;
}
if ( in_array( $match[ 'name' ], array( 'inner', 'html' ) ) ) {
$commands[ 'inner' ] = $match[ 'value' ];
continue;
}
if ( in_array( $match[ 'name' ], array( 'record', 'outer', 'text', 'text_br' ) ) ) {
$commands[ $match[ 'name' ] ] = $match[ 'value' ];
continue;
}
if ( $match[ 'name' ] == 'textarea' ) {
$commands[ 'text' ] = $match[ 'value' ];
continue;
}
} else {
$without_list[] = $value;
if ( in_array( $value, array( 'remove', 'fake', 'dummy' ) ) ) {
$commands[ 'remove' ] = true;
continue;
}
if ( $value == 'wrap' ) {
$commands[ 'wrap' ] = true;
continue;
}
}
}
$element->setAttribute( $this->setting[ 'attribute' ], implode( "; ", $without_list ) );
}
return $commands;
}
private function __replace_command_inner( $document, $element, $inner, $data ) {
while ( $element->hasChildNodes() ) {
$element->removeChild( $element->firstChild );
}
if ( preg_match( $this->__re[ 'replace_inner_url' ], $inner, $match ) ) {
$urls = preg_split( $this->__re[ 'replace_inner_url_split' ], $match[ 'url' ] );
$parts = array();
for ( $n = 0; $n < count( $urls ); $n++ ) {
$urls[ $n ] = trim( $urls[ $n ] );
if ( $urls[ $n ] == '' ) {
continue;
}
$part = @file_get_contents( $urls[ $n ] );
if ( $part !== false ) {
$parts[] = preg_replace( $this->__re[ 'bom' ], "", $part );
}
}
if ( count( $parts ) > 0 ) {
$inner = implode( $this->setting[ 'nl' ], $parts );
$this->__parse( $document, $element, $inner );
}
} else {
if ( isset( $data[ $inner ] ) ) {
$this->__parse( $document, $element, $data[ $inner ] );
}
}
}
//*********************************************************************
// Build
private function __build( $element ) {
if ( in_array( $element->tagName, $this->__tag_empty ) ) {
return sprintf( '<%s%s%s>', $element->tagName, $this->__build_attributes( $element ), $this->setting[ 'empty_tag_end' ] );
}
return sprintf( '<%s%s>%s%s>', $element->tagName, $this->__build_attributes( $element ), $this->__build_childrens( $element ), $element->tagName );
}
private function __build_childrens( $element ) {
$build = '';
for ( $c = 0; $c < $element->childNodes->length; $c++ ) {
$children = $element->childNodes->item( $c );
if ( $children->nodeType == XML_ELEMENT_NODE ) {
$build .= $this->__build( $children );
continue;
}
if ( $children->nodeType == XML_COMMENT_NODE ) {
$build .= "";
continue;
}
if ( $children->nodeType == XML_CDATA_SECTION_NODE ) {
$build .= "nodeValue}]]>";
continue;
}
$build .= $children->nodeValue;
}
return $build;
}
private function __build_attributes( $element ) {
$attributes = '';
for ( $a = 0; $a < $element->attributes->length; $a++ ) {
$attribute = $element->attributes->item( $a );
if ( $attribute->name == $this->setting[ 'attribute' ] ) {
continue;
}
if ( in_array( $attribute->name, $this->__attribute_blank ) ) {
if ( $attribute->value !== '' ) {
$attributes .= ' ' . ( $this->setting[ 'xhtml' ] ? "{$attribute->name}=\"{$attribute->name}\"" : $attribute->name );
}
} else {
if ( ( $attribute->value !== '' ) || in_array( $attribute->name, $this->__attribute_empty_ok ) ) {
$value = in_array( $attribute->name, $this->__attribute_url ) ? $attribute->value : $this->htmlEscape( $attribute->value );
$attributes .= ' ' . "{$attribute->name}=\"{$value}\"";
}
}
}
return $attributes;
}
}