From 029acfb922bdd25d6e38c864895c6cc66db76d13 Mon Sep 17 00:00:00 2001 From: Fred Drake Date: Mon, 20 Aug 2001 21:24:19 +0000 Subject: Deal more appropriately with bare ampersands and pointy brackets; this module has to deal with "class" HTML-as-deployed as well as XHTML, so we cannot be as strict as XHTML allows. This closes SF bug #453059, but uses a different fix than suggested in the bug comments. --- Lib/test/test_htmlparser.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) (limited to 'Lib/test/test_htmlparser.py') diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index e0e212c27a5..bb6e0b0fe52 100755 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -1,6 +1,7 @@ """Tests for HTMLParser.py.""" import HTMLParser +import pprint import sys import test_support import unittest @@ -83,9 +84,10 @@ class TestCaseBase(unittest.TestCase): for c in self.epilogue: parser.feed(c) parser.close() - self.assert_(parser.get_events() == - self.initial_events + events + self.final_events, - parser.get_events()) + events = parser.get_events() + self.assertEqual(events, + self.initial_events + events + self.final_events, + "got events:\n" + pprint.pformat(events)) def _run_check_extra(self, source, events): self._run_check(source, events, EventCollectorExtra) @@ -137,6 +139,18 @@ text ("data", "\n"), ]) + def test_doctype_decl(self): + inside = """\ +DOCTYPE html [ + + + +]""" + self._run_check("" % inside, [ + ("decl", inside), + ]) + def test_bad_nesting(self): # Strangely, this *is* supposed to test that overlapping # elements are allowed. HTMLParser is more geared toward @@ -148,6 +162,16 @@ text ("endtag", "b"), ]) + def test_bare_ampersands(self): + self._run_check("this text & contains & ampersands &", [ + ("data", "this text & contains & ampersands &"), + ]) + + def test_bare_pointy_brackets(self): + self._run_check("this < text > contains < bare>pointy< brackets", [ + ("data", "this < text > contains < bare>pointy< brackets"), + ]) + def test_attr_syntax(self): output = [ ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)]) @@ -199,16 +223,12 @@ text self._run_check([""], output) def test_starttag_junk_chars(self): - self._parse_error("<") - self._parse_error("<>") self._parse_error("") self._parse_error("") self._parse_error("") self._parse_error("") - self._parse_error("<$") - self._parse_error("<$>") self._parse_error("") self._parse_error("