vichan/inc/lib/htmlpurifier-4.6.0/tests/HTMLPurifier/LexerTest.php

<?php

class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
{

    protected $_has_pear = false;

    public function __construct()
    {
        parent::__construct();
        if ($GLOBALS['HTMLPurifierTest']['PH5P']) {
            require_once 'HTMLPurifier/Lexer/PH5P.php';
        }
    }

    // HTMLPurifier_Lexer::create() --------------------------------------------

    public function test_create()
    {
        $this->config->set('Core.MaintainLineNumbers', true);
        $lexer = HTMLPurifier_Lexer::create($this->config);
        $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
    }

    public function test_create_objectLexerImpl()
    {
        $this->config->set('Core.LexerImpl', new HTMLPurifier_Lexer_DirectLex());
        $lexer = HTMLPurifier_Lexer::create($this->config);
        $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
    }

    public function test_create_unknownLexer()
    {
        $this->config->set('Core.LexerImpl', 'AsdfAsdf');
        $this->expectException(new HTMLPurifier_Exception('Cannot instantiate unrecognized Lexer type AsdfAsdf'));
        HTMLPurifier_Lexer::create($this->config);
    }

    public function test_create_incompatibleLexer()
    {
        $this->config->set('Core.LexerImpl', 'DOMLex');
        $this->config->set('Core.MaintainLineNumbers', true);
        $this->expectException(new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'));
        HTMLPurifier_Lexer::create($this->config);
    }

    // HTMLPurifier_Lexer->parseData() -----------------------------------------

    public function assertParseData($input, $expect = true)
    {
        if ($expect === true) $expect = $input;
        $lexer = new HTMLPurifier_Lexer();
        $this->assertIdentical($expect, $lexer->parseData($input));
    }

    public function test_parseData_plainText()
    {
        $this->assertParseData('asdf');
    }

    public function test_parseData_ampersandEntity()
    {
        $this->assertParseData('&amp;', '&');
    }

    public function test_parseData_quotEntity()
    {
        $this->assertParseData('&quot;', '"');
    }

    public function test_parseData_aposNumericEntity()
    {
        $this->assertParseData('&#039;', "'");
    }

    public function test_parseData_aposCompactNumericEntity()
    {
        $this->assertParseData('&#39;', "'");
    }

    public function test_parseData_adjacentAmpersandEntities()
    {
        $this->assertParseData('&amp;&amp;&amp;', '&&&');
    }

    public function test_parseData_trailingUnescapedAmpersand()
    {
        $this->assertParseData('&amp;&', '&&');
    }

    public function test_parseData_internalUnescapedAmpersand()
    {
        $this->assertParseData('Procter & Gamble');
    }

    public function test_parseData_improperEntityFaultToleranceTest()
    {
        $this->assertParseData('&#x2D;');
    }

    // HTMLPurifier_Lexer->extractBody() ---------------------------------------

    public function assertExtractBody($text, $extract = true)
    {
        $lexer = new HTMLPurifier_Lexer();
        $result = $lexer->extractBody($text);
        if ($extract === true) $extract = $text;
        $this->assertIdentical($extract, $result);
    }

    public function test_extractBody_noBodyTags()
    {
        $this->assertExtractBody('<b>Bold</b>');
    }

    public function test_extractBody_lowercaseBodyTags()
    {
        $this->assertExtractBody('<html><body><b>Bold</b></body></html>', '<b>Bold</b>');
    }

    public function test_extractBody_uppercaseBodyTags()
    {
        $this->assertExtractBody('<HTML><BODY><B>Bold</B></BODY></HTML>', '<B>Bold</B>');
    }

    public function test_extractBody_realisticUseCase()
    {
        $this->assertExtractBody(
'<?xml version="1.0"
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
   <head>
      <title>xyz</title>
   </head>
   <body>
      <form method="post" action="whatever1">
         <div>
            <input type="text" name="username" />
            <input type="text" name="password" />
            <input type="submit" />
         </div>
      </form>
   </body>
</html>',
    '
      <form method="post" action="whatever1">
         <div>
            <input type="text" name="username" />
            <input type="text" name="password" />
            <input type="submit" />
         </div>
      </form>
   ');
    }

    public function test_extractBody_bodyWithAttributes()
    {
        $this->assertExtractBody('<html><body bgcolor="#F00"><b>Bold</b></body></html>', '<b>Bold</b>');
    }

    public function test_extractBody_preserveUnclosedBody()
    {
        $this->assertExtractBody('<body>asdf'); // not closed, don't accept
    }

    public function test_extractBody_useLastBody()
    {
        $this->assertExtractBody('<body>foo</body>bar</body>', 'foo</body>bar');
    }

    // HTMLPurifier_Lexer->tokenizeHTML() --------------------------------------

    public function assertTokenization($input, $expect, $alt_expect = array())
    {
        $lexers = array();
        $lexers['DirectLex']  = new HTMLPurifier_Lexer_DirectLex();
        if (class_exists('DOMDocument')) {
            $lexers['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
            $lexers['PH5P']   = new HTMLPurifier_Lexer_PH5P();
        }
        foreach ($lexers as $name => $lexer) {
            $result = $lexer->tokenizeHTML($input, $this->config, $this->context);
            if (isset($alt_expect[$name])) {
                if ($alt_expect[$name] === false) continue;
                $t_expect = $alt_expect[$name];
                $this->assertIdentical($result, $alt_expect[$name], "$name: %s");
            } else {
                $t_expect = $expect;
                $this->assertIdentical($result, $expect, "$name: %s");
            }
            if ($t_expect != $result) {
                printTokens($result);
            }
        }
    }

    public function test_tokenizeHTML_emptyInput()
    {
        $this->assertTokenization('', array());
    }

    public function test_tokenizeHTML_plainText()
    {
        $this->assertTokenization(
            'This is regular text.',
            array(
                new HTMLPurifier_Token_Text('This is regular text.')
            )
        );
    }

    public function test_tokenizeHTML_textAndTags()
    {
        $this->assertTokenization(
            'This is <b>bold</b> text',
            array(
                new HTMLPurifier_Token_Text('This is '),
                new HTMLPurifier_Token_Start('b', array()),
                new HTMLPurifier_Token_Text('bold'),
                new HTMLPurifier_Token_End('b'),
                new HTMLPurifier_Token_Text(' text'),
            )
        );
    }

    public function test_tokenizeHTML_normalizeCase()
    {
        $this->assertTokenization(
            '<DIV>Totally rad dude. <b>asdf</b></div>',
            array(
                new HTMLPurifier_Token_Start('DIV', array()),
                new HTMLPurifier_Token_Text('Totally rad dude. '),
                new HTMLPurifier_Token_Start('b', array()),
                new HTMLPurifier_Token_Text('asdf'),
                new HTMLPurifier_Token_End('b'),
                new HTMLPurifier_Token_End('div'),
            )
        );
    }

    public function test_tokenizeHTML_notWellFormed()
    {
        $this->assertTokenization(
            '<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>',
            array(
                new HTMLPurifier_Token_Start('asdf'),
                new HTMLPurifier_Token_End('asdf'),
                new HTMLPurifier_Token_Start('d'),
                new HTMLPurifier_Token_End('d'),
                new HTMLPurifier_Token_Start('poOloka'),
                new HTMLPurifier_Token_Start('poolasdf'),
                new HTMLPurifier_Token_Start('ds'),
                new HTMLPurifier_Token_End('asdf'),
                new HTMLPurifier_Token_End('ASDF'),
            ),
            array(
                'DOMLex' => $alt = array(
                    new HTMLPurifier_Token_Empty('asdf'),
                    new HTMLPurifier_Token_Empty('d'),
                    new HTMLPurifier_Token_Start('pooloka'),
                    new HTMLPurifier_Token_Start('poolasdf'),
                    new HTMLPurifier_Token_Empty('ds'),
                    new HTMLPurifier_Token_End('poolasdf'),
                    new HTMLPurifier_Token_End('pooloka'),
                ),
                'PH5P' => $alt,
            )
        );
    }

    public function test_tokenizeHTML_whitespaceInTag()
    {
        $this->assertTokenization(
            '<a'."\t".'href="foobar.php"'."\n".'title="foo!">Link to <b id="asdf">foobar</b></a>',
            array(
                new HTMLPurifier_Token_Start('a',array('href'=>'foobar.php','title'=>'foo!')),
                new HTMLPurifier_Token_Text('Link to '),
                new HTMLPurifier_Token_Start('b',array('id'=>'asdf')),
                new HTMLPurifier_Token_Text('foobar'),
                new HTMLPurifier_Token_End('b'),
                new HTMLPurifier_Token_End('a'),
            )
        );
    }

    public function test_tokenizeHTML_singleAttribute()
    {
        $this->assertTokenization(
            '<br style="&amp;" />',
            array(
                new HTMLPurifier_Token_Empty('br', array('style' => '&'))
            )
        );
    }

    public function test_tokenizeHTML_emptyTag()
    {
        $this->assertTokenization(
            '<br />',
            array( new HTMLPurifier_Token_Empty('br') )
        );
    }

    public function test_tokenizeHTML_comment()
    {
        $this->assertTokenization(
            '<!-- Comment -->',
            array( new HTMLPurifier_Token_Comment(' Comment ') )
        );
    }

    public function test_tokenizeHTML_malformedComment()
    {
        $this->assertTokenization(
            '<!-- not so well formed --->',
            array( new HTMLPurifier_Token_Comment(' not so well formed -') )
        );
    }

    public function test_tokenizeHTML_unterminatedTag()
    {
        $this->assertTokenization(
            '<a href=""',
            array( new HTMLPurifier_Token_Text('<a href=""') ),
            array(
                // I like our behavior better, but it's non-standard
                'DOMLex'   => array( new HTMLPurifier_Token_Empty('a', array('href'=>'')) ),
                'PH5P' => false, // total barfing, grabs scaffolding too
            )
        );
    }

    public function test_tokenizeHTML_specialEntities()
    {
        $this->assertTokenization(
            '&lt;b&gt;',
            array(
                new HTMLPurifier_Token_Text('<b>')
            ),
            array(
                // some parsers will separate entities out
                'PH5P' => array(
                    new HTMLPurifier_Token_Text('<'),
                    new HTMLPurifier_Token_Text('b'),
                    new HTMLPurifier_Token_Text('>'),
                ),
            )
        );
    }

    public function test_tokenizeHTML_earlyQuote()
    {
        $this->assertTokenization(
            '<a "=>',
            array( new HTMLPurifier_Token_Empty('a') ),
            array(
                // we barf on this input
                'DirectLex' => array(
                    new HTMLPurifier_Token_Start('a', array('"' => ''))
                ),
                'PH5P' => false, // behavior varies; handle this personally
            )
        );
    }

    public function test_tokenizeHTML_earlyQuote_PH5P()
    {
        if (!class_exists('DOMDocument')) return;
        $lexer = new HTMLPurifier_Lexer_PH5P();
        $result = $lexer->tokenizeHTML('<a "=>', $this->config, $this->context);
        if ($this->context->get('PH5PError', true)) {
            $this->assertIdentical(array(
                new HTMLPurifier_Token_Start('a', array('"' => ''))
            ), $result);
        } else {
            $this->assertIdentical(array(
                new HTMLPurifier_Token_Empty('a', array('"' => ''))
            ), $result);
        }
    }

    public function test_tokenizeHTML_unescapedQuote()
    {
        $this->assertTokenization(
            '"',
            array( new HTMLPurifier_Token_Text('"') )
        );
    }

    public function test_tokenizeHTML_escapedQuote()
    {
        $this->assertTokenization(
            '&quot;',
            array( new HTMLPurifier_Token_Text('"') )
        );
    }

    public function test_tokenizeHTML_cdata()
    {
        $this->assertTokenization(
            '<![CDATA[You <b>can&#39;t</b> get me!]]>',
            array( new HTMLPurifier_Token_Text('You <b>can&#39;t</b> get me!') ),
            array(
                'PH5P' =>  array(
                    new HTMLPurifier_Token_Text('You '),
                    new HTMLPurifier_Token_Text('<'),
                    new HTMLPurifier_Token_Text('b'),
                    new HTMLPurifier_Token_Text('>'),
                    new HTMLPurifier_Token_Text('can'),
                    new HTMLPurifier_Token_Text('&'),
                    new HTMLPurifier_Token_Text('#39;t'),
                    new HTMLPurifier_Token_Text('<'),
                    new HTMLPurifier_Token_Text('/b'),
                    new HTMLPurifier_Token_Text('>'),
                    new HTMLPurifier_Token_Text(' get me!'),
                ),
            )
        );
    }

    public function test_tokenizeHTML_characterEntity()
    {
        $this->assertTokenization(
            '&theta;',
            array( new HTMLPurifier_Token_Text("\xCE\xB8") )
        );
    }

    public function test_tokenizeHTML_characterEntityInCDATA()
    {
        $this->assertTokenization(
            '<![CDATA[&rarr;]]>',
            array( new HTMLPurifier_Token_Text("&rarr;") ),
            array(
                'PH5P' => array(
                    new HTMLPurifier_Token_Text('&'),
                    new HTMLPurifier_Token_Text('rarr;'),
                ),
            )
        );
    }

    public function test_tokenizeHTML_entityInAttribute()
    {
        $this->assertTokenization(
            '<a href="index.php?title=foo&amp;id=bar">Link</a>',
            array(
                new HTMLPurifier_Token_Start('a',array('href' => 'index.php?title=foo&id=bar')),
                new HTMLPurifier_Token_Text('Link'),
                new HTMLPurifier_Token_End('a'),
            )
        );
    }

    public function test_tokenizeHTML_preserveUTF8()
    {
        $this->assertTokenization(
            "\xCE\xB8",
            array( new HTMLPurifier_Token_Text("\xCE\xB8") )
        );
    }

    public function test_tokenizeHTML_specialEntityInAttribute()
    {
        $this->assertTokenization(
            '<br test="x &lt; 6" />',
            array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) )
        );
    }

    public function test_tokenizeHTML_emoticonProtection()
    {
        $this->assertTokenization(
            '<b>Whoa! <3 That\'s not good >.></b>',
            array(
                new HTMLPurifier_Token_Start('b'),
                new HTMLPurifier_Token_Text('Whoa! '),
                new HTMLPurifier_Token_Text('<'),
                new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
                new HTMLPurifier_Token_End('b')
            ),
            array(
                // text is absorbed together
                'DOMLex' => array(
                    new HTMLPurifier_Token_Start('b'),
                    new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'),
                    new HTMLPurifier_Token_End('b'),
                ),
                'PH5P' => array( // interesting grouping
                    new HTMLPurifier_Token_Start('b'),
                    new HTMLPurifier_Token_Text('Whoa! '),
                    new HTMLPurifier_Token_Text('<'),
                    new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
                    new HTMLPurifier_Token_End('b'),
                ),
            )
        );
    }

    public function test_tokenizeHTML_commentWithFunkyChars()
    {
        $this->assertTokenization(
            '<!-- This >< comment --><br />',
            array(
                new HTMLPurifier_Token_Comment(' This >< comment '),
                new HTMLPurifier_Token_Empty('br'),
            )
        );
    }

    public function test_tokenizeHTML_unterminatedComment()
    {
        $this->assertTokenization(
            '<!-- This >< comment',
            array( new HTMLPurifier_Token_Comment(' This >< comment') ),
            array(
                'DOMLex'   => false,
                'PH5P'     => false,
            )
        );
    }

    public function test_tokenizeHTML_scriptCDATAContents()
    {
        $this->config->set('HTML.Trusted', true);
        $this->assertTokenization(
            'Foo: <script>alert("<foo>");</script>',
            array(
                new HTMLPurifier_Token_Text('Foo: '),
                new HTMLPurifier_Token_Start('script'),
                new HTMLPurifier_Token_Text('alert("<foo>");'),
                new HTMLPurifier_Token_End('script'),
            ),
            array(
                // PH5P, for some reason, bubbles the script to <head>
                'PH5P' => false,
            )
        );
    }

    public function test_tokenizeHTML_entitiesInComment()
    {
        $this->assertTokenization(
            '<!-- This comment < &lt; & -->',
            array( new HTMLPurifier_Token_Comment(' This comment < &lt; & ') )
        );
    }

    public function test_tokenizeHTML_attributeWithSpecialCharacters()
    {
        $this->assertTokenization(
            '<a href="><>">',
            array( new HTMLPurifier_Token_Empty('a', array('href' => '><>')) ),
            array(
                'DirectLex' => array(
                    new HTMLPurifier_Token_Start('a', array('href' => '')),
                    new HTMLPurifier_Token_Text('<'),
                    new HTMLPurifier_Token_Text('">'),
                )
            )
        );
    }

    public function test_tokenizeHTML_emptyTagWithSlashInAttribute()
    {
        $this->assertTokenization(
            '<param name="src" value="http://example.com/video.wmv" />',
            array( new HTMLPurifier_Token_Empty('param', array('name' => 'src', 'value' => 'http://example.com/video.wmv')) )
        );
    }

    public function test_tokenizeHTML_style()
    {
        $extra = array(
                // PH5P doesn't seem to like style tags
                'PH5P' => false,
                // DirectLex defers to RemoveForeignElements for textification
                'DirectLex' => array(
                    new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
                    new HTMLPurifier_Token_Comment("\ndiv {}\n"),
                    new HTMLPurifier_Token_End('style'),
                ),
            );
        if (!defined('LIBXML_VERSION')) {
            // LIBXML_VERSION is missing in early versions of PHP
            // prior to 1.30 of php-src/ext/libxml/libxml.c (version-wise,
            // this translates to 5.0.x. In such cases, punt the test entirely.
            return;
        } elseif (LIBXML_VERSION < 20628) {
            // libxml's behavior is wrong prior to this version, so make
            // appropriate accomodations
            $extra['DOMLex'] = $extra['DirectLex'];
        }
        $this->assertTokenization(
'<style type="text/css"><!--
div {}
--></style>',
            array(
                new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
                new HTMLPurifier_Token_Text("\ndiv {}\n"),
                new HTMLPurifier_Token_End('style'),
            ),
            $extra
        );
    }

    public function test_tokenizeHTML_tagWithAtSignAndExtraGt()
    {
        $alt_expect = array(
            // Technically this is invalid, but it won't be a
            // problem with invalid element removal; also, this
            // mimics Mozilla's parsing of the tag.
            new HTMLPurifier_Token_Start('a@'),
            new HTMLPurifier_Token_Text('>'),
        );
        $this->assertTokenization(
            '<a@>>',
            array(
                new HTMLPurifier_Token_Start('a'),
                new HTMLPurifier_Token_Text('>'),
                new HTMLPurifier_Token_End('a'),
            ),
            array(
                'DirectLex' => $alt_expect,
            )
        );
    }

    public function test_tokenizeHTML_emoticonHeart()
    {
        $this->assertTokenization(
            '<br /><3<br />',
            array(
                new HTMLPurifier_Token_Empty('br'),
                new HTMLPurifier_Token_Text('<'),
                new HTMLPurifier_Token_Text('3'),
                new HTMLPurifier_Token_Empty('br'),
            ),
            array(
                'DOMLex' => array(
                    new HTMLPurifier_Token_Empty('br'),
                    new HTMLPurifier_Token_Text('<3'),
                    new HTMLPurifier_Token_Empty('br'),
                ),
            )
        );
    }

    public function test_tokenizeHTML_emoticonShiftyEyes()
    {
        $this->assertTokenization(
            '<b><<</b>',
            array(
                new HTMLPurifier_Token_Start('b'),
                new HTMLPurifier_Token_Text('<'),
                new HTMLPurifier_Token_Text('<'),
                new HTMLPurifier_Token_End('b'),
            ),
            array(
                'DOMLex' => array(
                    new HTMLPurifier_Token_Start('b'),
                    new HTMLPurifier_Token_Text('<<'),
                    new HTMLPurifier_Token_End('b'),
                ),
            )
        );
    }

    public function test_tokenizeHTML_eon1996()
    {
        $this->assertTokenization(
            '< <b>test</b>',
            array(
                new HTMLPurifier_Token_Text('<'),
                new HTMLPurifier_Token_Text(' '),
                new HTMLPurifier_Token_Start('b'),
                new HTMLPurifier_Token_Text('test'),
                new HTMLPurifier_Token_End('b'),
            ),
            array(
                'DOMLex' => array(
                    new HTMLPurifier_Token_Text('< '),
                    new HTMLPurifier_Token_Start('b'),
                    new HTMLPurifier_Token_Text('test'),
                    new HTMLPurifier_Token_End('b'),
                ),
            )
        );
    }

    public function test_tokenizeHTML_bodyInCDATA()
    {
        $alt_tokens = array(
            new HTMLPurifier_Token_Text('<'),
            new HTMLPurifier_Token_Text('body'),
            new HTMLPurifier_Token_Text('>'),
            new HTMLPurifier_Token_Text('Foo'),
            new HTMLPurifier_Token_Text('<'),
            new HTMLPurifier_Token_Text('/body'),
            new HTMLPurifier_Token_Text('>'),
        );
        $this->assertTokenization(
            '<![CDATA[<body>Foo</body>]]>',
            array(
                new HTMLPurifier_Token_Text('<body>Foo</body>'),
            ),
            array(
                'PH5P' => $alt_tokens,
            )
        );
    }

    public function test_tokenizeHTML_()
    {
        $this->assertTokenization(
            '<a><img /></a>',
            array(
                new HTMLPurifier_Token_Start('a'),
                new HTMLPurifier_Token_Empty('img'),
                new HTMLPurifier_Token_End('a'),
            )
        );
    }

    public function test_tokenizeHTML_ignoreIECondComment()
    {
        $this->assertTokenization(
            '<!--[if IE]>foo<a>bar<!-- baz --><![endif]-->',
            array()
        );
    }

    public function test_tokenizeHTML_removeProcessingInstruction()
    {
        $this->config->set('Core.RemoveProcessingInstructions', true);
        $this->assertTokenization(
            '<?xml blah blah ?>',
            array()
        );
    }

   public function test_tokenizeHTML_removeNewline()
   {
        $this->config->set('Core.NormalizeNewlines', true);
        $this->assertTokenization(
            "plain\rtext\r\n",
            array(
                new HTMLPurifier_Token_Text("plain\ntext\n")
            )
        );
   }

   public function test_tokenizeHTML_noRemoveNewline()
   {
        $this->config->set('Core.NormalizeNewlines', false);
        $this->assertTokenization(
            "plain\rtext\r\n",
            array(
                new HTMLPurifier_Token_Text("plain\rtext\r\n")
            )
        );
     }

    public function test_tokenizeHTML_conditionalCommentUngreedy()
    {
        $this->assertTokenization(
            '<!--[if gte mso 9]>a<![endif]-->b<!--[if gte mso 9]>c<![endif]-->',
            array(
                new HTMLPurifier_Token_Text("b")
            )
        );
    }

    public function test_tokenizeHTML_imgTag()
    {
        $start = array(
                        new HTMLPurifier_Token_Start('img',
                            array(
                                'src' => 'img_11775.jpg',
                                'alt' => '[Img #11775]',
                                'id' => 'EMBEDDED_IMG_11775',
                            )
                        )
                    );
        $this->assertTokenization(
            '<img src="img_11775.jpg" alt="[Img #11775]" id="EMBEDDED_IMG_11775" >',
            array(
                new HTMLPurifier_Token_Empty('img',
                    array(
                        'src' => 'img_11775.jpg',
                        'alt' => '[Img #11775]',
                        'id' => 'EMBEDDED_IMG_11775',
                    )
                )
            ),
            array(
                'DirectLex' => $start,
                )
        );
    }


    /*

    public function test_tokenizeHTML_()
    {
        $this->assertTokenization(
            ,
            array(

            )
        );
    }
    */

}

// vim: et sw=4 sts=4
Upgrade HTMLPurifier library 4.5->4.6 2015-02-21 08:16:38 +01:00			`<?php`

			`class HTMLPurifier_LexerTest extends HTMLPurifier_Harness`
			`{`

			`protected $_has_pear = false;`

			`public function __construct()`
			`{`
			`parent::__construct();`
			`if ($GLOBALS['HTMLPurifierTest']['PH5P']) {`
			`require_once 'HTMLPurifier/Lexer/PH5P.php';`
			`}`
			`}`

			`// HTMLPurifier_Lexer::create() --------------------------------------------`

			`public function test_create()`
			`{`
			`$this->config->set('Core.MaintainLineNumbers', true);`
			`$lexer = HTMLPurifier_Lexer::create($this->config);`
			`$this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');`
			`}`

			`public function test_create_objectLexerImpl()`
			`{`
			`$this->config->set('Core.LexerImpl', new HTMLPurifier_Lexer_DirectLex());`
			`$lexer = HTMLPurifier_Lexer::create($this->config);`
			`$this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');`
			`}`

			`public function test_create_unknownLexer()`
			`{`
			`$this->config->set('Core.LexerImpl', 'AsdfAsdf');`
			`$this->expectException(new HTMLPurifier_Exception('Cannot instantiate unrecognized Lexer type AsdfAsdf'));`
			`HTMLPurifier_Lexer::create($this->config);`
			`}`

			`public function test_create_incompatibleLexer()`
			`{`
			`$this->config->set('Core.LexerImpl', 'DOMLex');`
			`$this->config->set('Core.MaintainLineNumbers', true);`
			`$this->expectException(new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'));`
			`HTMLPurifier_Lexer::create($this->config);`
			`}`

			`// HTMLPurifier_Lexer->parseData() -----------------------------------------`

			`public function assertParseData($input, $expect = true)`
			`{`
			`if ($expect === true) $expect = $input;`
			`$lexer = new HTMLPurifier_Lexer();`
			`$this->assertIdentical($expect, $lexer->parseData($input));`
			`}`

			`public function test_parseData_plainText()`
			`{`
			`$this->assertParseData('asdf');`
			`}`

			`public function test_parseData_ampersandEntity()`
			`{`
			`$this->assertParseData('&', '&');`
			`}`

			`public function test_parseData_quotEntity()`
			`{`
			`$this->assertParseData('"', '"');`
			`}`

			`public function test_parseData_aposNumericEntity()`
			`{`
			`$this->assertParseData(''', "'");`
			`}`

			`public function test_parseData_aposCompactNumericEntity()`
			`{`
			`$this->assertParseData(''', "'");`
			`}`

			`public function test_parseData_adjacentAmpersandEntities()`
			`{`
			`$this->assertParseData('&&&', '&&&');`
			`}`

			`public function test_parseData_trailingUnescapedAmpersand()`
			`{`
			`$this->assertParseData('&&', '&&');`
			`}`

			`public function test_parseData_internalUnescapedAmpersand()`
			`{`
			`$this->assertParseData('Procter & Gamble');`
			`}`

			`public function test_parseData_improperEntityFaultToleranceTest()`
			`{`
			`$this->assertParseData('-');`
			`}`

			`// HTMLPurifier_Lexer->extractBody() ---------------------------------------`

			`public function assertExtractBody($text, $extract = true)`
			`{`
			`$lexer = new HTMLPurifier_Lexer();`
			`$result = $lexer->extractBody($text);`
			`if ($extract === true) $extract = $text;`
			`$this->assertIdentical($extract, $result);`
			`}`

			`public function test_extractBody_noBodyTags()`
			`{`
			`$this->assertExtractBody('<b>Bold</b>');`
			`}`

			`public function test_extractBody_lowercaseBodyTags()`
			`{`
			`$this->assertExtractBody('<html><body><b>Bold</b></body></html>', '<b>Bold</b>');`
			`}`

			`public function test_extractBody_uppercaseBodyTags()`
			`{`
			`$this->assertExtractBody('<HTML><BODY><B>Bold</B></BODY></HTML>', '<B>Bold</B>');`
			`}`

			`public function test_extractBody_realisticUseCase()`
			`{`
			`$this->assertExtractBody(`
			`'<?xml version="1.0"`
			`<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"`
			`"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">`
			`<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">`
			`<head>`
			`<title>xyz</title>`
			`</head>`
			`<body>`
			`<form method="post" action="whatever1">`
			`<div>`
			`<input type="text" name="username" />`
			`<input type="text" name="password" />`
			`<input type="submit" />`
			`</div>`
			`</form>`
			`</body>`
			`</html>',`
			`'`
			`<form method="post" action="whatever1">`
			`<div>`
			`<input type="text" name="username" />`
			`<input type="text" name="password" />`
			`<input type="submit" />`
			`</div>`
			`</form>`
			`');`
			`}`

			`public function test_extractBody_bodyWithAttributes()`
			`{`
			`$this->assertExtractBody('<html><body bgcolor="#F00"><b>Bold</b></body></html>', '<b>Bold</b>');`
			`}`

			`public function test_extractBody_preserveUnclosedBody()`
			`{`
			`$this->assertExtractBody('<body>asdf'); // not closed, don't accept`
			`}`

			`public function test_extractBody_useLastBody()`
			`{`
			`$this->assertExtractBody('<body>foo</body>bar</body>', 'foo</body>bar');`
			`}`

			`// HTMLPurifier_Lexer->tokenizeHTML() --------------------------------------`

			`public function assertTokenization($input, $expect, $alt_expect = array())`
			`{`
			`$lexers = array();`
			`$lexers['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();`
			`if (class_exists('DOMDocument')) {`
			`$lexers['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();`
			`$lexers['PH5P'] = new HTMLPurifier_Lexer_PH5P();`
			`}`
			`foreach ($lexers as $name => $lexer) {`
			`$result = $lexer->tokenizeHTML($input, $this->config, $this->context);`
			`if (isset($alt_expect[$name])) {`
			`if ($alt_expect[$name] === false) continue;`
			`$t_expect = $alt_expect[$name];`
			`$this->assertIdentical($result, $alt_expect[$name], "$name: %s");`
			`} else {`
			`$t_expect = $expect;`
			`$this->assertIdentical($result, $expect, "$name: %s");`
			`}`
			`if ($t_expect != $result) {`
			`printTokens($result);`
			`}`
			`}`
			`}`

			`public function test_tokenizeHTML_emptyInput()`
			`{`
			`$this->assertTokenization('', array());`
			`}`

			`public function test_tokenizeHTML_plainText()`
			`{`
			`$this->assertTokenization(`
			`'This is regular text.',`
			`array(`
			`new HTMLPurifier_Token_Text('This is regular text.')`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_textAndTags()`
			`{`
			`$this->assertTokenization(`
			`'This is <b>bold</b> text',`
			`array(`
			`new HTMLPurifier_Token_Text('This is '),`
			`new HTMLPurifier_Token_Start('b', array()),`
			`new HTMLPurifier_Token_Text('bold'),`
			`new HTMLPurifier_Token_End('b'),`
			`new HTMLPurifier_Token_Text(' text'),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_normalizeCase()`
			`{`
			`$this->assertTokenization(`
			`'<DIV>Totally rad dude. <b>asdf</b></div>',`
			`array(`
			`new HTMLPurifier_Token_Start('DIV', array()),`
			`new HTMLPurifier_Token_Text('Totally rad dude. '),`
			`new HTMLPurifier_Token_Start('b', array()),`
			`new HTMLPurifier_Token_Text('asdf'),`
			`new HTMLPurifier_Token_End('b'),`
			`new HTMLPurifier_Token_End('div'),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_notWellFormed()`
			`{`
			`$this->assertTokenization(`
			`'<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>',`
			`array(`
			`new HTMLPurifier_Token_Start('asdf'),`
			`new HTMLPurifier_Token_End('asdf'),`
			`new HTMLPurifier_Token_Start('d'),`
			`new HTMLPurifier_Token_End('d'),`
			`new HTMLPurifier_Token_Start('poOloka'),`
			`new HTMLPurifier_Token_Start('poolasdf'),`
			`new HTMLPurifier_Token_Start('ds'),`
			`new HTMLPurifier_Token_End('asdf'),`
			`new HTMLPurifier_Token_End('ASDF'),`
			`),`
			`array(`
			`'DOMLex' => $alt = array(`
			`new HTMLPurifier_Token_Empty('asdf'),`
			`new HTMLPurifier_Token_Empty('d'),`
			`new HTMLPurifier_Token_Start('pooloka'),`
			`new HTMLPurifier_Token_Start('poolasdf'),`
			`new HTMLPurifier_Token_Empty('ds'),`
			`new HTMLPurifier_Token_End('poolasdf'),`
			`new HTMLPurifier_Token_End('pooloka'),`
			`),`
			`'PH5P' => $alt,`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_whitespaceInTag()`
			`{`
			`$this->assertTokenization(`
			`'<a'."\t".'href="foobar.php"'."\n".'title="foo!">Link to <b id="asdf">foobar</b></a>',`
			`array(`
			`new HTMLPurifier_Token_Start('a',array('href'=>'foobar.php','title'=>'foo!')),`
			`new HTMLPurifier_Token_Text('Link to '),`
			`new HTMLPurifier_Token_Start('b',array('id'=>'asdf')),`
			`new HTMLPurifier_Token_Text('foobar'),`
			`new HTMLPurifier_Token_End('b'),`
			`new HTMLPurifier_Token_End('a'),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_singleAttribute()`
			`{`
			`$this->assertTokenization(`
			`'<br style="&" />',`
			`array(`
			`new HTMLPurifier_Token_Empty('br', array('style' => '&'))`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_emptyTag()`
			`{`
			`$this->assertTokenization(`
			`'<br />',`
			`array( new HTMLPurifier_Token_Empty('br') )`
			`);`
			`}`

			`public function test_tokenizeHTML_comment()`
			`{`
			`$this->assertTokenization(`
			`'<!-- Comment -->',`
			`array( new HTMLPurifier_Token_Comment(' Comment ') )`
			`);`
			`}`

			`public function test_tokenizeHTML_malformedComment()`
			`{`
			`$this->assertTokenization(`
			`'<!-- not so well formed --->',`
			`array( new HTMLPurifier_Token_Comment(' not so well formed -') )`
			`);`
			`}`

			`public function test_tokenizeHTML_unterminatedTag()`
			`{`
			`$this->assertTokenization(`
			`'<a href=""',`
			`array( new HTMLPurifier_Token_Text('<a href=""') ),`
			`array(`
			`// I like our behavior better, but it's non-standard`
			`'DOMLex' => array( new HTMLPurifier_Token_Empty('a', array('href'=>'')) ),`
			`'PH5P' => false, // total barfing, grabs scaffolding too`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_specialEntities()`
			`{`
			`$this->assertTokenization(`
			`'<b>',`
			`array(`
			`new HTMLPurifier_Token_Text('<b>')`
			`),`
			`array(`
			`// some parsers will separate entities out`
			`'PH5P' => array(`
			`new HTMLPurifier_Token_Text('<'),`
			`new HTMLPurifier_Token_Text('b'),`
			`new HTMLPurifier_Token_Text('>'),`
			`),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_earlyQuote()`
			`{`
			`$this->assertTokenization(`
			`'<a "=>',`
			`array( new HTMLPurifier_Token_Empty('a') ),`
			`array(`
			`// we barf on this input`
			`'DirectLex' => array(`
			`new HTMLPurifier_Token_Start('a', array('"' => ''))`
			`),`
			`'PH5P' => false, // behavior varies; handle this personally`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_earlyQuote_PH5P()`
			`{`
			`if (!class_exists('DOMDocument')) return;`
			`$lexer = new HTMLPurifier_Lexer_PH5P();`
			`$result = $lexer->tokenizeHTML('<a "=>', $this->config, $this->context);`
			`if ($this->context->get('PH5PError', true)) {`
			`$this->assertIdentical(array(`
			`new HTMLPurifier_Token_Start('a', array('"' => ''))`
			`), $result);`
			`} else {`
			`$this->assertIdentical(array(`
			`new HTMLPurifier_Token_Empty('a', array('"' => ''))`
			`), $result);`
			`}`
			`}`

			`public function test_tokenizeHTML_unescapedQuote()`
			`{`
			`$this->assertTokenization(`
			`'"',`
			`array( new HTMLPurifier_Token_Text('"') )`
			`);`
			`}`

			`public function test_tokenizeHTML_escapedQuote()`
			`{`
			`$this->assertTokenization(`
			`'"',`
			`array( new HTMLPurifier_Token_Text('"') )`
			`);`
			`}`

			`public function test_tokenizeHTML_cdata()`
			`{`
			`$this->assertTokenization(`
			`'<![CDATA[You <b>can't</b> get me!]]>',`
			`array( new HTMLPurifier_Token_Text('You <b>can't</b> get me!') ),`
			`array(`
			`'PH5P' => array(`
			`new HTMLPurifier_Token_Text('You '),`
			`new HTMLPurifier_Token_Text('<'),`
			`new HTMLPurifier_Token_Text('b'),`
			`new HTMLPurifier_Token_Text('>'),`
			`new HTMLPurifier_Token_Text('can'),`
			`new HTMLPurifier_Token_Text('&'),`
			`new HTMLPurifier_Token_Text('#39;t'),`
			`new HTMLPurifier_Token_Text('<'),`
			`new HTMLPurifier_Token_Text('/b'),`
			`new HTMLPurifier_Token_Text('>'),`
			`new HTMLPurifier_Token_Text(' get me!'),`
			`),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_characterEntity()`
			`{`
			`$this->assertTokenization(`
			`'θ',`
			`array( new HTMLPurifier_Token_Text("\xCE\xB8") )`
			`);`
			`}`

			`public function test_tokenizeHTML_characterEntityInCDATA()`
			`{`
			`$this->assertTokenization(`
			`'<![CDATA[→]]>',`
			`array( new HTMLPurifier_Token_Text("→") ),`
			`array(`
			`'PH5P' => array(`
			`new HTMLPurifier_Token_Text('&'),`
			`new HTMLPurifier_Token_Text('rarr;'),`
			`),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_entityInAttribute()`
			`{`
			`$this->assertTokenization(`
			`'<a href="index.php?title=foo&id=bar">Link</a>',`
			`array(`
			`new HTMLPurifier_Token_Start('a',array('href' => 'index.php?title=foo&id=bar')),`
			`new HTMLPurifier_Token_Text('Link'),`
			`new HTMLPurifier_Token_End('a'),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_preserveUTF8()`
			`{`
			`$this->assertTokenization(`
			`"\xCE\xB8",`
			`array( new HTMLPurifier_Token_Text("\xCE\xB8") )`
			`);`
			`}`

			`public function test_tokenizeHTML_specialEntityInAttribute()`
			`{`
			`$this->assertTokenization(`
			`'<br test="x < 6" />',`
			`array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) )`
			`);`
			`}`

			`public function test_tokenizeHTML_emoticonProtection()`
			`{`
			`$this->assertTokenization(`
			`'<b>Whoa! <3 That\'s not good >.></b>',`
			`array(`
			`new HTMLPurifier_Token_Start('b'),`
			`new HTMLPurifier_Token_Text('Whoa! '),`
			`new HTMLPurifier_Token_Text('<'),`
			`new HTMLPurifier_Token_Text('3 That\'s not good >.>'),`
			`new HTMLPurifier_Token_End('b')`
			`),`
			`array(`
			`// text is absorbed together`
			`'DOMLex' => array(`
			`new HTMLPurifier_Token_Start('b'),`
			`new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'),`
			`new HTMLPurifier_Token_End('b'),`
			`),`
			`'PH5P' => array( // interesting grouping`
			`new HTMLPurifier_Token_Start('b'),`
			`new HTMLPurifier_Token_Text('Whoa! '),`
			`new HTMLPurifier_Token_Text('<'),`
			`new HTMLPurifier_Token_Text('3 That\'s not good >.>'),`
			`new HTMLPurifier_Token_End('b'),`
			`),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_commentWithFunkyChars()`
			`{`
			`$this->assertTokenization(`
			`'<!-- This >< comment --><br />',`
			`array(`
			`new HTMLPurifier_Token_Comment(' This >< comment '),`
			`new HTMLPurifier_Token_Empty('br'),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_unterminatedComment()`
			`{`
			`$this->assertTokenization(`
			`'<!-- This >< comment',`
			`array( new HTMLPurifier_Token_Comment(' This >< comment') ),`
			`array(`
			`'DOMLex' => false,`
			`'PH5P' => false,`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_scriptCDATAContents()`
			`{`
			`$this->config->set('HTML.Trusted', true);`
			`$this->assertTokenization(`
			`'Foo: <script>alert("<foo>");</script>',`
			`array(`
			`new HTMLPurifier_Token_Text('Foo: '),`
			`new HTMLPurifier_Token_Start('script'),`
			`new HTMLPurifier_Token_Text('alert("<foo>");'),`
			`new HTMLPurifier_Token_End('script'),`
			`),`
			`array(`
			`// PH5P, for some reason, bubbles the script to <head>`
			`'PH5P' => false,`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_entitiesInComment()`
			`{`
			`$this->assertTokenization(`
			`'<!-- This comment < < & -->',`
			`array( new HTMLPurifier_Token_Comment(' This comment < < & ') )`
			`);`
			`}`

			`public function test_tokenizeHTML_attributeWithSpecialCharacters()`
			`{`
			`$this->assertTokenization(`
			`'<a href="><>">',`
			`array( new HTMLPurifier_Token_Empty('a', array('href' => '><>')) ),`
			`array(`
			`'DirectLex' => array(`
			`new HTMLPurifier_Token_Start('a', array('href' => '')),`
			`new HTMLPurifier_Token_Text('<'),`
			`new HTMLPurifier_Token_Text('">'),`
			`)`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_emptyTagWithSlashInAttribute()`
			`{`
			`$this->assertTokenization(`
			`'<param name="src" value="http://example.com/video.wmv" />',`
			`array( new HTMLPurifier_Token_Empty('param', array('name' => 'src', 'value' => 'http://example.com/video.wmv')) )`
			`);`
			`}`

			`public function test_tokenizeHTML_style()`
			`{`
			`$extra = array(`
			`// PH5P doesn't seem to like style tags`
			`'PH5P' => false,`
			`// DirectLex defers to RemoveForeignElements for textification`
			`'DirectLex' => array(`
			`new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),`
			`new HTMLPurifier_Token_Comment("\ndiv {}\n"),`
			`new HTMLPurifier_Token_End('style'),`
			`),`
			`);`
			`if (!defined('LIBXML_VERSION')) {`
			`// LIBXML_VERSION is missing in early versions of PHP`
			`// prior to 1.30 of php-src/ext/libxml/libxml.c (version-wise,`
			`// this translates to 5.0.x. In such cases, punt the test entirely.`
			`return;`
			`} elseif (LIBXML_VERSION < 20628) {`
			`// libxml's behavior is wrong prior to this version, so make`
			`// appropriate accomodations`
			`$extra['DOMLex'] = $extra['DirectLex'];`
			`}`
			`$this->assertTokenization(`
			`'<style type="text/css"><!--`
			`div {}`
			`--></style>',`
			`array(`
			`new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),`
			`new HTMLPurifier_Token_Text("\ndiv {}\n"),`
			`new HTMLPurifier_Token_End('style'),`
			`),`
			`$extra`
			`);`
			`}`

			`public function test_tokenizeHTML_tagWithAtSignAndExtraGt()`
			`{`
			`$alt_expect = array(`
			`// Technically this is invalid, but it won't be a`
			`// problem with invalid element removal; also, this`
			`// mimics Mozilla's parsing of the tag.`
			`new HTMLPurifier_Token_Start('a@'),`
			`new HTMLPurifier_Token_Text('>'),`
			`);`
			`$this->assertTokenization(`
			`'<a@>>',`
			`array(`
			`new HTMLPurifier_Token_Start('a'),`
			`new HTMLPurifier_Token_Text('>'),`
			`new HTMLPurifier_Token_End('a'),`
			`),`
			`array(`
			`'DirectLex' => $alt_expect,`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_emoticonHeart()`
			`{`
			`$this->assertTokenization(`
			`'<br /><3<br />',`
			`array(`
			`new HTMLPurifier_Token_Empty('br'),`
			`new HTMLPurifier_Token_Text('<'),`
			`new HTMLPurifier_Token_Text('3'),`
			`new HTMLPurifier_Token_Empty('br'),`
			`),`
			`array(`
			`'DOMLex' => array(`
			`new HTMLPurifier_Token_Empty('br'),`
			`new HTMLPurifier_Token_Text('<3'),`
			`new HTMLPurifier_Token_Empty('br'),`
			`),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_emoticonShiftyEyes()`
			`{`
			`$this->assertTokenization(`
			`'<b><<</b>',`
			`array(`
			`new HTMLPurifier_Token_Start('b'),`
			`new HTMLPurifier_Token_Text('<'),`
			`new HTMLPurifier_Token_Text('<'),`
			`new HTMLPurifier_Token_End('b'),`
			`),`
			`array(`
			`'DOMLex' => array(`
			`new HTMLPurifier_Token_Start('b'),`
			`new HTMLPurifier_Token_Text('<<'),`
			`new HTMLPurifier_Token_End('b'),`
			`),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_eon1996()`
			`{`
			`$this->assertTokenization(`
			`'< <b>test</b>',`
			`array(`
			`new HTMLPurifier_Token_Text('<'),`
			`new HTMLPurifier_Token_Text(' '),`
			`new HTMLPurifier_Token_Start('b'),`
			`new HTMLPurifier_Token_Text('test'),`
			`new HTMLPurifier_Token_End('b'),`
			`),`
			`array(`
			`'DOMLex' => array(`
			`new HTMLPurifier_Token_Text('< '),`
			`new HTMLPurifier_Token_Start('b'),`
			`new HTMLPurifier_Token_Text('test'),`
			`new HTMLPurifier_Token_End('b'),`
			`),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_bodyInCDATA()`
			`{`
			`$alt_tokens = array(`
			`new HTMLPurifier_Token_Text('<'),`
			`new HTMLPurifier_Token_Text('body'),`
			`new HTMLPurifier_Token_Text('>'),`
			`new HTMLPurifier_Token_Text('Foo'),`
			`new HTMLPurifier_Token_Text('<'),`
			`new HTMLPurifier_Token_Text('/body'),`
			`new HTMLPurifier_Token_Text('>'),`
			`);`
			`$this->assertTokenization(`
			`'<![CDATA[<body>Foo</body>]]>',`
			`array(`
			`new HTMLPurifier_Token_Text('<body>Foo</body>'),`
			`),`
			`array(`
			`'PH5P' => $alt_tokens,`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_()`
			`{`
			`$this->assertTokenization(`
			`'<a><img /></a>',`
			`array(`
			`new HTMLPurifier_Token_Start('a'),`
			`new HTMLPurifier_Token_Empty('img'),`
			`new HTMLPurifier_Token_End('a'),`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_ignoreIECondComment()`
			`{`
			`$this->assertTokenization(`
			`'<!--[if IE]>foo<a>bar<!-- baz --><![endif]-->',`
			`array()`
			`);`
			`}`

			`public function test_tokenizeHTML_removeProcessingInstruction()`
			`{`
			`$this->config->set('Core.RemoveProcessingInstructions', true);`
			`$this->assertTokenization(`
			`'<?xml blah blah ?>',`
			`array()`
			`);`
			`}`

			`public function test_tokenizeHTML_removeNewline()`
			`{`
			`$this->config->set('Core.NormalizeNewlines', true);`
			`$this->assertTokenization(`
			`"plain\rtext\r\n",`
			`array(`
			`new HTMLPurifier_Token_Text("plain\ntext\n")`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_noRemoveNewline()`
			`{`
			`$this->config->set('Core.NormalizeNewlines', false);`
			`$this->assertTokenization(`
			`"plain\rtext\r\n",`
			`array(`
			`new HTMLPurifier_Token_Text("plain\rtext\r\n")`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_conditionalCommentUngreedy()`
			`{`
			`$this->assertTokenization(`
			`'<!--[if gte mso 9]>a<![endif]-->b<!--[if gte mso 9]>c<![endif]-->',`
			`array(`
			`new HTMLPurifier_Token_Text("b")`
			`)`
			`);`
			`}`

			`public function test_tokenizeHTML_imgTag()`
			`{`
			`$start = array(`
			`new HTMLPurifier_Token_Start('img',`
			`array(`
			`'src' => 'img_11775.jpg',`
			`'alt' => '[Img #11775]',`
			`'id' => 'EMBEDDED_IMG_11775',`
			`)`
			`)`
			`);`
			`$this->assertTokenization(`
			`'<img src="img_11775.jpg" alt="[Img #11775]" id="EMBEDDED_IMG_11775" >',`
			`array(`
			`new HTMLPurifier_Token_Empty('img',`
			`array(`
			`'src' => 'img_11775.jpg',`
			`'alt' => '[Img #11775]',`
			`'id' => 'EMBEDDED_IMG_11775',`
			`)`
			`)`
			`),`
			`array(`
			`'DirectLex' => $start,`
			`)`
			`);`
			`}`


			`/*`

			`public function test_tokenizeHTML_()`
			`{`
			`$this->assertTokenization(`
			`,`
			`array(`

			`)`
			`);`
			`}`
			`*/`

			`}`

			`// vim: et sw=4 sts=4`