aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/html/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/html/parser.py')
-rw-r--r--Lib/html/parser.py18
1 files changed, 17 insertions, 1 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index cc15de07b5b..9b4f0959913 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -29,7 +29,8 @@ attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=
starttagopen = re.compile('<[a-zA-Z]')
endtagopen = re.compile('</[a-zA-Z]')
piclose = re.compile('>')
-commentclose = re.compile(r'--\s*>')
+commentclose = re.compile(r'--!?>')
+commentabruptclose = re.compile(r'-?>')
# Note:
# 1) if you change tagfind/attrfind remember to update locatetagend too;
# 2) if you change tagfind/attrfind and/or locatetagend the parser will
@@ -336,6 +337,21 @@ class HTMLParser(_markupbase.ParserBase):
else:
return self.parse_bogus_comment(i)
+ # Internal -- parse comment, return length or -1 if not terminated
+ # see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
+ def parse_comment(self, i, report=True):
+ rawdata = self.rawdata
+ assert rawdata.startswith('<!--', i), 'unexpected call to parse_comment()'
+ match = commentclose.search(rawdata, i+4)
+ if not match:
+ match = commentabruptclose.match(rawdata, i+4)
+ if not match:
+ return -1
+ if report:
+ j = match.start()
+ self.handle_comment(rawdata[i+4: j])
+ return match.end()
+
# Internal -- parse bogus comment, return length or -1 if not terminated
# see https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
def parse_bogus_comment(self, i, report=1):