include/HTMLPurifier/standalone/HTMLPurifier/Lexer/PH5P.php
\HTML5
Constants
Properties

$entities= 'array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute',
'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;',
'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;',
'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;',
'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;',
'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;',
'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;',
'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;',
'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;',
'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN',
'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;',
'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;',
'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig',
'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;',
'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;',
'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil',
'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;',
'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;',
'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;',
'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth',
'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12',
'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt',
'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc',
'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;',
'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;',
'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;',
'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro',
'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;',
'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;',
'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;',
'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash',
'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;',
'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;',
'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;',
'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;',
'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;',
'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;',
'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;',
'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;',
'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc',
'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;',
'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;')'
array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute',
'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;',
'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;',
'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;',
'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;',
'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;',
'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;',
'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;',
'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;',
'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN',
'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;',
'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;',
'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig',
'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;',
'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;',
'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil',
'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;',
'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;',
'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;',
'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth',
'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12',
'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt',
'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc',
'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;',
'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;',
'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;',
'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro',
'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;',
'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;',
'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;',
'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash',
'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;',
'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;',
'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;',
'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;',
'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;',
'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;',
'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;',
'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;',
'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc',
'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;',
'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;')Details- Type
- n/a
Methods
\HTML5TreeConstructer
Constants
Properties

$formatting= 'array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u')'
array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u')Details- Type
- n/a

$scoping= 'array('button','caption','html','marquee','object','table','td','th')'
array('button','caption','html','marquee','object','table','td','th')Details- Type
- n/a

$special= 'array('address','area','base','basefont','bgsound',
'blockquote','body','br','center','col','colgroup','dd','dir','div','dl',
'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5',
'h6','head','hr','iframe','image','img','input','isindex','li','link',
'listing','menu','meta','noembed','noframes','noscript','ol','optgroup',
'option','p','param','plaintext','pre','script','select','spacer','style',
'tbody','textarea','tfoot','thead','title','tr','ul','wbr')'
array('address','area','base','basefont','bgsound',
'blockquote','body','br','center','col','colgroup','dd','dir','div','dl',
'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5',
'h6','head','hr','iframe','image','img','input','isindex','li','link',
'listing','menu','meta','noembed','noframes','noscript','ol','optgroup',
'option','p','param','plaintext','pre','script','select','spacer','style',
'tbody','textarea','tfoot','thead','title','tr','ul','wbr')Details- Type
- n/a
Methods
\HTMLPurifier_Lexer_PH5P
Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.
Occupies space in the HTML5 pseudo-namespace, which may cause conflicts.
- Parent(s)
- \HTMLPurifier_Lexer_DOMLex < \HTMLPurifier_Lexer
- Note
- Recent changes to PHP's DOM extension have resulted in some fatal error conditions with the original version of PH5P. Pending changes, this lexer will punt to DirectLex if DOM throughs an exception.
Properties

$_special_entity2str= 'array(
'"' => '"',
'&' => '&',
'<' => '<',
'>' => '>',
''' => "'",
''' => "'",
''' => "'"
)'
Most common entity to raw value conversion table for special entities.
Inherited from: \HTMLPurifier_Lexer::$$_special_entity2str\HTMLPurifier_Lexer_DOMLex::$$_special_entity2strarray(
'"' => '"',
'&' => '&',
'<' => '<',
'>' => '>',
''' => "'",
''' => "'",
''' => "'"
)Details- Type
- n/a
- Inherited_from
- \HTMLPurifier_Lexer::$$_special_entity2str
- Inherited_from
- \HTMLPurifier_Lexer_DOMLex::$$_special_entity2str

$factory= ''
- Type
- n/a
- Inherited_from
- \HTMLPurifier_Lexer_DOMLex::$$factory

$tracksLineNumbers= 'false'
Whether or not this lexer implements line-number/column-number tracking.
Inherited from: \HTMLPurifier_Lexer::$$tracksLineNumbers\HTMLPurifier_Lexer_DOMLex::$$tracksLineNumbersIf it does, set to true.
falseDetails- Type
- n/a
- Inherited_from
- \HTMLPurifier_Lexer::$$tracksLineNumbers
- Inherited_from
- \HTMLPurifier_Lexer_DOMLex::$$tracksLineNumbers
Methods

CDATACallback(
$matches
)
:
voidCallback function for escapeCDATA() that does the work.
Inherited from: \HTMLPurifier_Lexer::CDATACallback()\HTMLPurifier_Lexer_DOMLex::CDATACallback()| Name | Type | Description |
|---|---|---|
| $matches |
- Params
- $matches PCRE matches array, with index 0 the entire match and 1 the inside of the CDATA section.
- Returns
- Escaped internals of the CDATA section.
- Warning
- Though this is public in order to let the callback happen, calling it directly is not recommended.

callbackArmorCommentEntities(
$matches
)
:
voidCallback function that entity-izes ampersands in comments so that callbackUndoCommentSubst doesn't clobber them
Inherited from: \HTMLPurifier_Lexer_DOMLex::callbackArmorCommentEntities()| Name | Type | Description |
|---|---|---|
| $matches |

callbackUndoCommentSubst(
$matches
)
:
voidCallback function for undoing escaping of stray angled brackets in comments
Inherited from: \HTMLPurifier_Lexer_DOMLex::callbackUndoCommentSubst()| Name | Type | Description |
|---|---|---|
| $matches |

create(
\$config $config
)
:
\ConcreteRetrieves or sets the default Lexer as a Prototype Factory.
Inherited from: \HTMLPurifier_Lexer::create()\HTMLPurifier_Lexer_DOMLex::create()By default HTMLPurifier_Lexer_DOMLex will be returned. There are a few exceptions involving special features that only DirectLex implements.
| Name | Type | Description |
|---|---|---|
| $config | \$config | Instance of HTMLPurifier_Config |
| Type | Description |
|---|---|
| \Concrete | lexer. |
- Note
- The behavior of this class has changed, rather than accepting a prototype object, it now accepts a configuration object. To specify your own prototype, set %Core.LexerImpl to it. This change in behavior de-singletonizes the lexer object.

createEndNode(
$node, $tokens
)
:
void| Name | Type | Description |
|---|---|---|
| $node | ||
| $tokens |

createStartNode(
\$node $node, \$tokens $tokens, \$collect $collect
)
:
void| Name | Type | Description |
|---|---|---|
| $node | \$node | DOMNode to be tokenized. |
| $tokens | \$tokens | Array-list of already tokenized tokens. |
| $collect | \$collect | Says whether or start and close are collected, set to false at first recursion because it's the implicit DIV tag you're dealing with. |
- Returns
- bool if the token needs an endtoken

escapeCDATA(
\$string $string
)
:
voidTranslates CDATA sections into regular sections (through escaping).
Inherited from: \HTMLPurifier_Lexer::escapeCDATA()\HTMLPurifier_Lexer_DOMLex::escapeCDATA()| Name | Type | Description |
|---|---|---|
| $string | \$string | HTML string to process. |
- Returns
- HTML with CDATA sections escaped.

escapeCommentedCDATA(
$string
)
:
voidSpecial CDATA case that is especially convoluted for
Inherited from: \HTMLPurifier_Lexer::escapeCommentedCDATA()\HTMLPurifier_Lexer_DOMLex::escapeCommentedCDATA()| Name | Type | Description |
|---|---|---|
| $string |

extractBody(
$html
)
:
voidTakes a string of HTML (fragment or document) and returns the content
Inherited from: \HTMLPurifier_Lexer::extractBody()\HTMLPurifier_Lexer_DOMLex::extractBody()| Name | Type | Description |
|---|---|---|
| $html |

muteErrorHandler(
$errno, $errstr
)
:
voidAn error handler that mutes all errors
Inherited from: \HTMLPurifier_Lexer_DOMLex::muteErrorHandler()| Name | Type | Description |
|---|---|---|
| $errno | ||
| $errstr |

normalize(
$html, $config, $context
)
:
voidTakes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits, and other good stuff.
Inherited from: \HTMLPurifier_Lexer::normalize()\HTMLPurifier_Lexer_DOMLex::normalize()| Name | Type | Description |
|---|---|---|
| $html | ||
| $config | ||
| $context |

parseData(
\$string $string
)
:
voidParses special entities into the proper characters.
Inherited from: \HTMLPurifier_Lexer::parseData()\HTMLPurifier_Lexer_DOMLex::parseData()This string will translate escaped versions of the special characters into the correct ones.
| Name | Type | Description |
|---|---|---|
| $string | \$string | String character data to be parsed. |
- Returns
- Parsed character data.
- Warning
- You should be able to treat the output of this function as completely parsed, but that's only because all other entities should have been handled previously in substituteNonSpecialEntities()

removeIEConditional(
$string
)
:
voidSpecial Internet Explorer conditional comments should be removed.
Inherited from: \HTMLPurifier_Lexer::removeIEConditional()\HTMLPurifier_Lexer_DOMLex::removeIEConditional()| Name | Type | Description |
|---|---|---|
| $string |

tokenizeDOM(
\$node $node, \$tokens $tokens
)
:
voidIterative function that tokenizes a node, putting it into an accumulator.
Inherited from: \HTMLPurifier_Lexer_DOMLex::tokenizeDOM()To iterate is human, to recurse divine - L. Peter Deutsch
| Name | Type | Description |
|---|---|---|
| $node | \$node | DOMNode to be tokenized. |
| $tokens | \$tokens | Array-list of already tokenized tokens. |
- Returns
- Tokens of node appended to previously passed tokens.

tokenizeHTML(
$html, $config, $context
)
:
\HTMLPurifier_TokenLexes an HTML string into tokens.
| Name | Type | Description |
|---|---|---|
| $html | ||
| $config | ||
| $context |
| Type | Description |
|---|---|
| \HTMLPurifier_Token | array representation of HTML. |

transformAttrToAssoc(
\$attribute_list $node_map
)
:
voidConverts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
Inherited from: \HTMLPurifier_Lexer_DOMLex::transformAttrToAssoc()| Name | Type | Description |
|---|---|---|
| $node_map | \$attribute_list | DOMNamedNodeMap of DOMAttr objects. |
- Returns
- Associative array of attributes.

wrapHTML(
$html, $config, $context
)
:
voidWraps an HTML fragment in the necessary HTML
Inherited from: \HTMLPurifier_Lexer_DOMLex::wrapHTML()| Name | Type | Description |
|---|---|---|
| $html | ||
| $config | ||
| $context |