diff options
Diffstat (limited to 'Lib/test/test_htmlparser.py')
-rw-r--r-- | Lib/test/test_htmlparser.py | 91 |
1 files changed, 74 insertions, 17 deletions
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 4fdba06cf4c..61fa24fab57 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -317,6 +317,16 @@ text ("endtag", element_lower)], collector=Collector(convert_charrefs=False)) + def test_EOF_in_cdata(self): + content = """<!-- not a comment --> ¬-an-entity-ref; + <a href="" /> </p><p> <span></span></style> + '</script' + '>'""" + s = f'<script>{content}' + self._run_check(s, [ + ("starttag", 'script', []), + ("data", content) + ]) + def test_comments(self): html = ("<!-- I'm a valid comment -->" '<!--me too!-->' @@ -566,12 +576,33 @@ text for html, expected in data: self._run_check(html, expected) - def test_broken_comments(self): + def test_EOF_in_comments_or_decls(self): + data = [ + ('<!', [('data', '<!')]), + ('<!-', [('data', '<!-')]), + ('<!--', [('data', '<!--')]), + ('<![', [('data', '<![')]), + ('<![CDATA[', [('data', '<![CDATA[')]), + ('<![CDATA[x', [('data', '<![CDATA[x')]), + ('<!DOCTYPE', [('data', '<!DOCTYPE')]), + ('<!DOCTYPE HTML', [('data', '<!DOCTYPE HTML')]), + ] + for html, expected in data: + self._run_check(html, expected) + def test_bogus_comments(self): html = ('<! not really a comment >' '<! not a comment either -->' '<! -- close enough -->' '<!><!<-- this was an empty comment>' - '<!!! another bogus comment !!!>') + '<!!! another bogus comment !!!>' + # see #32876 + '<![with square brackets]!>' + '<![\nmultiline\nbogusness\n]!>' + '<![more brackets]-[and a hyphen]!>' + '<![cdata[should be uppercase]]>' + '<![CDATA [whitespaces are not ignored]]>' + '<![CDATA]]>' # required '[' after CDATA + ) expected = [ ('comment', ' not really a comment '), ('comment', ' not a comment either --'), @@ -579,39 +610,65 @@ text ('comment', ''), ('comment', '<-- this was an empty comment'), ('comment', '!! another bogus comment !!!'), + ('comment', '[with square brackets]!'), + ('comment', '[\nmultiline\nbogusness\n]!'), + ('comment', '[more brackets]-[and a hyphen]!'), + ('comment', '[cdata[should be uppercase]]'), + ('comment', '[CDATA [whitespaces are not ignored]]'), + ('comment', '[CDATA]]'), ] self._run_check(html, expected) def test_broken_condcoms(self): # these condcoms are missing the '--' after '<!' and before the '>' + # and they are considered bogus comments according to + # "8.2.4.42. Markup declaration open state" html = ('<![if !(IE)]>broken condcom<![endif]>' '<![if ! IE]><link href="favicon.tiff"/><![endif]>' '<![if !IE 6]><img src="firefox.png" /><![endif]>' '<![if !ie 6]><b>foo</b><![endif]>' '<![if (!IE)|(lt IE 9)]><img src="mammoth.bmp" /><![endif]>') - # According to the HTML5 specs sections "8.2.4.44 Bogus comment state" - # and "8.2.4.45 Markup declaration open state", comment tokens should - # be emitted instead of 'unknown decl', but calling unknown_decl - # provides more flexibility. - # See also Lib/_markupbase.py:parse_declaration expected = [ - ('unknown decl', 'if !(IE)'), + ('comment', '[if !(IE)]'), ('data', 'broken condcom'), - ('unknown decl', 'endif'), - ('unknown decl', 'if ! IE'), + ('comment', '[endif]'), + ('comment', '[if ! IE]'), ('startendtag', 'link', [('href', 'favicon.tiff')]), - ('unknown decl', 'endif'), - ('unknown decl', 'if !IE 6'), + ('comment', '[endif]'), + ('comment', '[if !IE 6]'), ('startendtag', 'img', [('src', 'firefox.png')]), - ('unknown decl', 'endif'), - ('unknown decl', 'if !ie 6'), + ('comment', '[endif]'), + ('comment', '[if !ie 6]'), ('starttag', 'b', []), ('data', 'foo'), ('endtag', 'b'), - ('unknown decl', 'endif'), - ('unknown decl', 'if (!IE)|(lt IE 9)'), + ('comment', '[endif]'), + ('comment', '[if (!IE)|(lt IE 9)]'), ('startendtag', 'img', [('src', 'mammoth.bmp')]), - ('unknown decl', 'endif') + ('comment', '[endif]') + ] + self._run_check(html, expected) + + def test_cdata_declarations(self): + # More tests should be added. See also "8.2.4.42. Markup + # declaration open state", "8.2.4.69. CDATA section state", + # and issue 32876 + html = ('<![CDATA[just some plain text]]>') + expected = [('unknown decl', 'CDATA[just some plain text')] + self._run_check(html, expected) + + def test_cdata_declarations_multiline(self): + html = ('<code><![CDATA[' + ' if (a < b && a > b) {' + ' printf("[<marquee>How?</marquee>]");' + ' }' + ']]></code>') + expected = [ + ('starttag', 'code', []), + ('unknown decl', + 'CDATA[ if (a < b && a > b) { ' + 'printf("[<marquee>How?</marquee>]"); }'), + ('endtag', 'code') ] self._run_check(html, expected) |