diff options
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/_pyio.py | 80 | ||||
-rw-r--r-- | Lib/asyncio/base_events.py | 65 | ||||
-rw-r--r-- | Lib/encodings/idna.py | 2 | ||||
-rw-r--r-- | Lib/fractions.py | 3 | ||||
-rw-r--r-- | Lib/html/parser.py | 161 | ||||
-rw-r--r-- | Lib/test/test_asyncio/test_base_events.py | 29 | ||||
-rw-r--r-- | Lib/test/test_capi/test_abstract.py | 25 | ||||
-rw-r--r-- | Lib/test/test_capi/test_opt.py | 15 | ||||
-rw-r--r-- | Lib/test/test_fractions.py | 7 | ||||
-rw-r--r-- | Lib/test/test_free_threading/test_io.py | 42 | ||||
-rw-r--r-- | Lib/test/test_generated_cases.py | 47 | ||||
-rw-r--r-- | Lib/test/test_getpath.py | 21 | ||||
-rw-r--r-- | Lib/test/test_htmlparser.py | 187 | ||||
-rw-r--r-- | Lib/test/test_io.py | 1 | ||||
-rw-r--r-- | Lib/test/test_types.py | 40 | ||||
-rw-r--r-- | Lib/test/test_unittest/test_case.py | 16 | ||||
-rw-r--r-- | Lib/test/test_xml_etree.py | 27 | ||||
-rw-r--r-- | Lib/unittest/_log.py | 5 | ||||
-rw-r--r-- | Lib/unittest/case.py | 6 | ||||
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 10 |
20 files changed, 568 insertions, 221 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py index fb2a6d049ca..5db8ce9244b 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -876,16 +876,28 @@ class BytesIO(BufferedIOBase): _buffer = None def __init__(self, initial_bytes=None): + # Use to keep self._buffer and self._pos consistent. + self._lock = Lock() + buf = bytearray() if initial_bytes is not None: buf += initial_bytes - self._buffer = buf - self._pos = 0 + + with self._lock: + self._buffer = buf + self._pos = 0 def __getstate__(self): if self.closed: raise ValueError("__getstate__ on closed file") - return self.__dict__.copy() + with self._lock: + state = self.__dict__.copy() + del state['_lock'] + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self._lock = Lock() def getvalue(self): """Return the bytes value (contents) of the buffer @@ -918,14 +930,16 @@ class BytesIO(BufferedIOBase): raise TypeError(f"{size!r} is not an integer") else: size = size_index() - if size < 0: - size = len(self._buffer) - if len(self._buffer) <= self._pos: - return b"" - newpos = min(len(self._buffer), self._pos + size) - b = self._buffer[self._pos : newpos] - self._pos = newpos - return bytes(b) + + with self._lock: + if size < 0: + size = len(self._buffer) + if len(self._buffer) <= self._pos: + return b"" + newpos = min(len(self._buffer), self._pos + size) + b = self._buffer[self._pos : newpos] + self._pos = newpos + return bytes(b) def read1(self, size=-1): """This is the same as read. @@ -941,12 +955,14 @@ class BytesIO(BufferedIOBase): n = view.nbytes # Size of any bytes-like object if n == 0: return 0 - pos = self._pos - if pos > len(self._buffer): - # Pad buffer to pos with null bytes. - self._buffer.resize(pos) - self._buffer[pos:pos + n] = b - self._pos += n + + with self._lock: + pos = self._pos + if pos > len(self._buffer): + # Pad buffer to pos with null bytes. + self._buffer.resize(pos) + self._buffer[pos:pos + n] = b + self._pos += n return n def seek(self, pos, whence=0): @@ -963,9 +979,11 @@ class BytesIO(BufferedIOBase): raise ValueError("negative seek position %r" % (pos,)) self._pos = pos elif whence == 1: - self._pos = max(0, self._pos + pos) + with self._lock: + self._pos = max(0, self._pos + pos) elif whence == 2: - self._pos = max(0, len(self._buffer) + pos) + with self._lock: + self._pos = max(0, len(self._buffer) + pos) else: raise ValueError("unsupported whence value") return self._pos @@ -978,18 +996,20 @@ class BytesIO(BufferedIOBase): def truncate(self, pos=None): if self.closed: raise ValueError("truncate on closed file") - if pos is None: - pos = self._pos - else: - try: - pos_index = pos.__index__ - except AttributeError: - raise TypeError(f"{pos!r} is not an integer") + + with self._lock: + if pos is None: + pos = self._pos else: - pos = pos_index() - if pos < 0: - raise ValueError("negative truncate position %r" % (pos,)) - del self._buffer[pos:] + try: + pos_index = pos.__index__ + except AttributeError: + raise TypeError(f"{pos!r} is not an integer") + else: + pos = pos_index() + if pos < 0: + raise ValueError("negative truncate position %r" % (pos,)) + del self._buffer[pos:] return pos def readable(self): diff --git a/Lib/asyncio/base_events.py b/Lib/asyncio/base_events.py index 2ff9e4017bb..520d4b39854 100644 --- a/Lib/asyncio/base_events.py +++ b/Lib/asyncio/base_events.py @@ -1016,38 +1016,43 @@ class BaseEventLoop(events.AbstractEventLoop): family, type_, proto, _, address = addr_info sock = None try: - sock = socket.socket(family=family, type=type_, proto=proto) - sock.setblocking(False) - if local_addr_infos is not None: - for lfamily, _, _, _, laddr in local_addr_infos: - # skip local addresses of different family - if lfamily != family: - continue - try: - sock.bind(laddr) - break - except OSError as exc: - msg = ( - f'error while attempting to bind on ' - f'address {laddr!r}: {str(exc).lower()}' - ) - exc = OSError(exc.errno, msg) - my_exceptions.append(exc) - else: # all bind attempts failed - if my_exceptions: - raise my_exceptions.pop() - else: - raise OSError(f"no matching local address with {family=} found") - await self.sock_connect(sock, address) - return sock - except OSError as exc: - my_exceptions.append(exc) - if sock is not None: - sock.close() - raise + try: + sock = socket.socket(family=family, type=type_, proto=proto) + sock.setblocking(False) + if local_addr_infos is not None: + for lfamily, _, _, _, laddr in local_addr_infos: + # skip local addresses of different family + if lfamily != family: + continue + try: + sock.bind(laddr) + break + except OSError as exc: + msg = ( + f'error while attempting to bind on ' + f'address {laddr!r}: {str(exc).lower()}' + ) + exc = OSError(exc.errno, msg) + my_exceptions.append(exc) + else: # all bind attempts failed + if my_exceptions: + raise my_exceptions.pop() + else: + raise OSError(f"no matching local address with {family=} found") + await self.sock_connect(sock, address) + return sock + except OSError as exc: + my_exceptions.append(exc) + raise except: if sock is not None: - sock.close() + try: + sock.close() + except OSError: + # An error when closing a newly created socket is + # not important, but it can overwrite more important + # non-OSError error. So ignore it. + pass raise finally: exceptions = my_exceptions = None diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index 60a8d5eb227..0c90b4c9fe1 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -316,7 +316,7 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder): class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def _buffer_decode(self, input, errors, final): if errors != 'strict': - raise UnicodeError("Unsupported error handling: {errors}") + raise UnicodeError(f"Unsupported error handling: {errors}") if not input: return ("", 0) diff --git a/Lib/fractions.py b/Lib/fractions.py index cb05ae7c200..a8c67068522 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -504,6 +504,9 @@ class Fraction(numbers.Rational): trim_point = not alternate_form exponent_indicator = "E" if presentation_type in "EFG" else "e" + if align == '=' and fill == '0': + zeropad = True + # Round to get the digits we need, figure out where to place the point, # and decide whether to use scientific notation. 'point_pos' is the # relative to the _end_ of the digit string: that is, it's the number diff --git a/Lib/html/parser.py b/Lib/html/parser.py index ba416e7fa6e..9b4f0959913 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -29,17 +29,46 @@ attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;= starttagopen = re.compile('<[a-zA-Z]') endtagopen = re.compile('</[a-zA-Z]') piclose = re.compile('>') -commentclose = re.compile(r'--\s*>') +commentclose = re.compile(r'--!?>') +commentabruptclose = re.compile(r'-?>') # Note: -# 1) if you change tagfind/attrfind remember to update locatestarttagend too; -# 2) if you change tagfind/attrfind and/or locatestarttagend the parser will +# 1) if you change tagfind/attrfind remember to update locatetagend too; +# 2) if you change tagfind/attrfind and/or locatetagend the parser will # explode, so don't do it. -# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state -# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state -tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*') -attrfind_tolerant = re.compile( - r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') +# see the HTML5 specs section "13.2.5.6 Tag open state", +# "13.2.5.8 Tag name state" and "13.2.5.33 Attribute name state". +# https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state +# https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state +# https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state +tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*') +attrfind_tolerant = re.compile(r""" + ( + (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + ) + (= # value indicator + ('[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + (?:[\t\n\r\f ]|/(?!>))* # possibly followed by a space +""", re.VERBOSE) +locatetagend = re.compile(r""" + [a-zA-Z][^\t\n\r\f />]* # tag name + [\t\n\r\f /]* # optional whitespace before attribute name + (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + (?:= # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + [\t\n\r\f /]* # possibly followed by a space + )* + >? +""", re.VERBOSE) +# The following variables are not used, but are temporarily left for +# backward compatibility. locatestarttagend_tolerant = re.compile(r""" <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name @@ -56,8 +85,6 @@ locatestarttagend_tolerant = re.compile(r""" \s* # trailing whitespace """, re.VERBOSE) endendtag = re.compile('>') -# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between -# </ and the tag name, so maybe this should be fixed endtagfind = re.compile(r'</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>') # Character reference processing logic specific to attribute values @@ -141,7 +168,8 @@ class HTMLParser(_markupbase.ParserBase): def set_cdata_mode(self, elem): self.cdata_elem = elem.lower() - self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I) + self.interesting = re.compile(r'</%s(?=[\t\n\r\f />])' % self.cdata_elem, + re.IGNORECASE|re.ASCII) def clear_cdata_mode(self): self.interesting = interesting_normal @@ -166,7 +194,7 @@ class HTMLParser(_markupbase.ParserBase): # & near the end and see if it's followed by a space or ;. amppos = rawdata.rfind('&', max(i, n-34)) if (amppos >= 0 and - not re.compile(r'[\s;]').search(rawdata, amppos)): + not re.compile(r'[\t\n\r\f ;]').search(rawdata, amppos)): break # wait till we get all the text j = n else: @@ -309,8 +337,23 @@ class HTMLParser(_markupbase.ParserBase): else: return self.parse_bogus_comment(i) + # Internal -- parse comment, return length or -1 if not terminated + # see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state + def parse_comment(self, i, report=True): + rawdata = self.rawdata + assert rawdata.startswith('<!--', i), 'unexpected call to parse_comment()' + match = commentclose.search(rawdata, i+4) + if not match: + match = commentabruptclose.match(rawdata, i+4) + if not match: + return -1 + if report: + j = match.start() + self.handle_comment(rawdata[i+4: j]) + return match.end() + # Internal -- parse bogus comment, return length or -1 if not terminated - # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state + # see https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state def parse_bogus_comment(self, i, report=1): rawdata = self.rawdata assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to ' @@ -336,6 +379,8 @@ class HTMLParser(_markupbase.ParserBase): # Internal -- handle starttag, return end or -1 if not terminated def parse_starttag(self, i): + # See the HTML5 specs section "13.2.5.8 Tag name state" + # https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state self.__starttag_text = None endpos = self.check_for_whole_start_tag(i) if endpos < 0: @@ -381,76 +426,42 @@ class HTMLParser(_markupbase.ParserBase): # or -1 if incomplete. def check_for_whole_start_tag(self, i): rawdata = self.rawdata - m = locatestarttagend_tolerant.match(rawdata, i) - if m: - j = m.end() - next = rawdata[j:j+1] - if next == ">": - return j + 1 - if next == "/": - if rawdata.startswith("/>", j): - return j + 2 - if rawdata.startswith("/", j): - # buffer boundary - return -1 - # else bogus input - if j > i: - return j - else: - return i + 1 - if next == "": - # end of input - return -1 - if next in ("abcdefghijklmnopqrstuvwxyz=/" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ"): - # end of input in or before attribute value, or we have the - # '/' from a '/>' ending - return -1 - if j > i: - return j - else: - return i + 1 - raise AssertionError("we should not get here!") + match = locatetagend.match(rawdata, i+1) + assert match + j = match.end() + if rawdata[j-1] != ">": + return -1 + return j # Internal -- parse endtag, return end or -1 if incomplete def parse_endtag(self, i): + # See the HTML5 specs section "13.2.5.7 End tag open state" + # https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state rawdata = self.rawdata assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag" - match = endendtag.search(rawdata, i+1) # > - if not match: + if rawdata.find('>', i+2) < 0: # fast check return -1 - gtpos = match.end() - match = endtagfind.match(rawdata, i) # </ + tag + > - if not match: - if self.cdata_elem is not None: - self.handle_data(rawdata[i:gtpos]) - return gtpos - # find the name: w3.org/TR/html5/tokenization.html#tag-name-state - namematch = tagfind_tolerant.match(rawdata, i+2) - if not namematch: - # w3.org/TR/html5/tokenization.html#end-tag-open-state - if rawdata[i:i+3] == '</>': - return i+3 - else: - return self.parse_bogus_comment(i) - tagname = namematch.group(1).lower() - # consume and ignore other stuff between the name and the > - # Note: this is not 100% correct, since we might have things like - # </tag attr=">">, but looking for > after the name should cover - # most of the cases and is much simpler - gtpos = rawdata.find('>', namematch.end()) - self.handle_endtag(tagname) - return gtpos+1 + if not endtagopen.match(rawdata, i): # </ + letter + if rawdata[i+2:i+3] == '>': # </> is ignored + # "missing-end-tag-name" parser error + return i+3 + else: + return self.parse_bogus_comment(i) - elem = match.group(1).lower() # script or style - if self.cdata_elem is not None: - if elem != self.cdata_elem: - self.handle_data(rawdata[i:gtpos]) - return gtpos + match = locatetagend.match(rawdata, i+2) + assert match + j = match.end() + if rawdata[j-1] != ">": + return -1 - self.handle_endtag(elem) + # find the name: "13.2.5.8 Tag name state" + # https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state + match = tagfind_tolerant.match(rawdata, i+2) + assert match + tag = match.group(1).lower() + self.handle_endtag(tag) self.clear_cdata_mode() - return gtpos + return j # Overridable -- finish processing of start+end tag: <tag.../> def handle_startendtag(self, tag, attrs): diff --git a/Lib/test/test_asyncio/test_base_events.py b/Lib/test/test_asyncio/test_base_events.py index bb9f366fc41..12179eb0c9e 100644 --- a/Lib/test/test_asyncio/test_base_events.py +++ b/Lib/test/test_asyncio/test_base_events.py @@ -24,6 +24,10 @@ import warnings MOCK_ANY = mock.ANY +class CustomError(Exception): + pass + + def tearDownModule(): asyncio._set_event_loop_policy(None) @@ -1326,6 +1330,31 @@ class BaseEventLoopWithSelectorTests(test_utils.TestCase): self.assertEqual(len(cm.exception.exceptions), 1) self.assertIsInstance(cm.exception.exceptions[0], OSError) + @patch_socket + def test_create_connection_connect_non_os_err_close_err(self, m_socket): + # Test the case when sock_connect() raises non-OSError exception + # and sock.close() raises OSError. + async def getaddrinfo(*args, **kw): + return [(2, 1, 6, '', ('107.6.106.82', 80))] + + def getaddrinfo_task(*args, **kwds): + return self.loop.create_task(getaddrinfo(*args, **kwds)) + + self.loop.getaddrinfo = getaddrinfo_task + self.loop.sock_connect = mock.Mock() + self.loop.sock_connect.side_effect = CustomError + sock = mock.Mock() + m_socket.socket.return_value = sock + sock.close.side_effect = OSError + + coro = self.loop.create_connection(MyProto, 'example.com', 80) + self.assertRaises( + CustomError, self.loop.run_until_complete, coro) + + coro = self.loop.create_connection(MyProto, 'example.com', 80, all_errors=True) + self.assertRaises( + CustomError, self.loop.run_until_complete, coro) + def test_create_connection_multiple(self): async def getaddrinfo(*args, **kw): return [(2, 1, 6, '', ('0.0.0.1', 80)), diff --git a/Lib/test/test_capi/test_abstract.py b/Lib/test/test_capi/test_abstract.py index 7d548ae87c0..3a2ed9f5db8 100644 --- a/Lib/test/test_capi/test_abstract.py +++ b/Lib/test/test_capi/test_abstract.py @@ -1077,6 +1077,31 @@ class CAPITest(unittest.TestCase): with self.assertRaisesRegex(TypeError, regex): PyIter_NextItem(10) + def test_object_setattr_null_exc(self): + class Obj: + pass + obj = Obj() + obj.attr = 123 + + exc = ValueError("error") + with self.assertRaises(SystemError) as cm: + _testcapi.object_setattr_null_exc(obj, 'attr', exc) + self.assertIs(cm.exception.__context__, exc) + self.assertIsNone(cm.exception.__cause__) + self.assertHasAttr(obj, 'attr') + + with self.assertRaises(SystemError) as cm: + _testcapi.object_setattrstring_null_exc(obj, 'attr', exc) + self.assertIs(cm.exception.__context__, exc) + self.assertIsNone(cm.exception.__cause__) + self.assertHasAttr(obj, 'attr') + + with self.assertRaises(SystemError) as cm: + # undecodable name + _testcapi.object_setattrstring_null_exc(obj, b'\xff', exc) + self.assertIs(cm.exception.__context__, exc) + self.assertIsNone(cm.exception.__cause__) + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index e4c9a463855..7be1c9eebb3 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2451,6 +2451,21 @@ class TestUopsOptimization(unittest.TestCase): self.assertNotIn("_GUARD_TOS_FLOAT", uops) self.assertNotIn("_GUARD_NOS_FLOAT", uops) + def test_binary_op_constant_evaluate(self): + def testfunc(n): + for _ in range(n): + 2 ** 65 + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + # For now... until we constant propagate it away. + self.assertIn("_BINARY_OP", uops) + + def global_identity(x): return x diff --git a/Lib/test/test_fractions.py b/Lib/test/test_fractions.py index d1d2739856c..1875a2f529c 100644 --- a/Lib/test/test_fractions.py +++ b/Lib/test/test_fractions.py @@ -1480,11 +1480,8 @@ class FractionTest(unittest.TestCase): (F('-1234.5678'), '08,.0f', '-001,235'), (F('-1234.5678'), '09,.0f', '-0,001,235'), # Corner-case - zero-padding specified through fill and align - # instead of the zero-pad character - in this case, treat '0' as a - # regular fill character and don't attempt to insert commas into - # the filled portion. This differs from the int and float - # behaviour. - (F('1234.5678'), '0=12,.2f', '00001,234.57'), + # instead of the zero-pad character. + (F('1234.5678'), '0=12,.2f', '0,001,234.57'), # Corner case where it's not clear whether the '0' indicates zero # padding or gives the minimum width, but there's still an obvious # answer to give. We want this to work in case the minimum width diff --git a/Lib/test/test_free_threading/test_io.py b/Lib/test/test_free_threading/test_io.py index f9bec740ddf..41d89e04da8 100644 --- a/Lib/test/test_free_threading/test_io.py +++ b/Lib/test/test_free_threading/test_io.py @@ -1,12 +1,13 @@ +import io +import _pyio as pyio import threading from unittest import TestCase from test.support import threading_helper from random import randint -from io import BytesIO from sys import getsizeof -class TestBytesIO(TestCase): +class ThreadSafetyMixin: # Test pretty much everything that can break under free-threading. # Non-deterministic, but at least one of these things will fail if # BytesIO object is not free-thread safe. @@ -90,20 +91,27 @@ class TestBytesIO(TestCase): barrier.wait() getsizeof(b) - self.check([write] * 10, BytesIO()) - self.check([writelines] * 10, BytesIO()) - self.check([write] * 10 + [truncate] * 10, BytesIO()) - self.check([truncate] + [read] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [read1] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [readline] * 10, BytesIO(b'0\n'*20480)) - self.check([truncate] + [readlines] * 10, BytesIO(b'0\n'*20480)) - self.check([truncate] + [readinto] * 10, BytesIO(b'0\n'*204800), bytearray(b'0\n'*204800)) - self.check([close] + [write] * 10, BytesIO()) - self.check([truncate] + [getvalue] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [getbuffer] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [iter] * 10, BytesIO(b'0\n'*20480)) - self.check([truncate] + [getstate] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [setstate] * 10, BytesIO(b'0\n'*204800), (b'123', 0, None)) - self.check([truncate] + [sizeof] * 10, BytesIO(b'0\n'*204800)) + self.check([write] * 10, self.ioclass()) + self.check([writelines] * 10, self.ioclass()) + self.check([write] * 10 + [truncate] * 10, self.ioclass()) + self.check([truncate] + [read] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [read1] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [readline] * 10, self.ioclass(b'0\n'*20480)) + self.check([truncate] + [readlines] * 10, self.ioclass(b'0\n'*20480)) + self.check([truncate] + [readinto] * 10, self.ioclass(b'0\n'*204800), bytearray(b'0\n'*204800)) + self.check([close] + [write] * 10, self.ioclass()) + self.check([truncate] + [getvalue] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [getbuffer] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [iter] * 10, self.ioclass(b'0\n'*20480)) + self.check([truncate] + [getstate] * 10, self.ioclass(b'0\n'*204800)) + state = self.ioclass(b'123').__getstate__() + self.check([truncate] + [setstate] * 10, self.ioclass(b'0\n'*204800), state) + self.check([truncate] + [sizeof] * 10, self.ioclass(b'0\n'*204800)) # no tests for seek or tell because they don't break anything + +class CBytesIOTest(ThreadSafetyMixin, TestCase): + ioclass = io.BytesIO + +class PyBytesIOTest(ThreadSafetyMixin, TestCase): + ioclass = pyio.BytesIO diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index eb01328b6ea..81d4e39f5be 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -2398,6 +2398,53 @@ class TestGeneratedAbstractCases(unittest.TestCase): """ self.run_cases_test(input, input2, output) + def test_replace_opcode_escaping_uop_body_copied_in_complex(self): + input = """ + pure op(OP, (foo -- res)) { + if (foo) { + res = ESCAPING_CODE(foo); + } + else { + res = 1; + } + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = sym_new_known(ctx, foo); + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + if (foo) { + res_stackref = ESCAPING_CODE(foo); + } + else { + res_stackref = 1; + } + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = sym_new_known(ctx, foo); + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + def test_replace_opocode_uop_reject_array_effects(self): input = """ pure op(OP, (foo[2] -- res)) { diff --git a/Lib/test/test_getpath.py b/Lib/test/test_getpath.py index f86df9d0d03..83f09f34955 100644 --- a/Lib/test/test_getpath.py +++ b/Lib/test/test_getpath.py @@ -354,6 +354,27 @@ class MockGetPathTests(unittest.TestCase): actual = getpath(ns, expected) self.assertEqual(expected, actual) + def test_venv_posix_without_home_key(self): + ns = MockPosixNamespace( + argv0="/venv/bin/python3", + PREFIX="/usr", + ENV_PATH="/usr/bin", + ) + # Setup the bare minimum venv + ns.add_known_xfile("/usr/bin/python3") + ns.add_known_xfile("/venv/bin/python3") + ns.add_known_link("/venv/bin/python3", "/usr/bin/python3") + ns.add_known_file("/venv/pyvenv.cfg", [ + # home = key intentionally omitted + ]) + expected = dict( + executable="/venv/bin/python3", + prefix="/venv", + base_prefix="/usr", + ) + actual = getpath(ns, expected) + self.assertEqual(expected, actual) + def test_venv_changed_name_posix(self): "Test a venv layout on *nix." ns = MockPosixNamespace( diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 65a4bee72b9..15cad061889 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -81,6 +81,13 @@ class EventCollectorCharrefs(EventCollector): self.fail('This should never be called with convert_charrefs=True') +# The normal event collector normalizes the events in get_events, +# so we override it to return the original list of events. +class EventCollectorNoNormalize(EventCollector): + def get_events(self): + return self.events + + class TestCaseBase(unittest.TestCase): def get_collector(self): @@ -265,8 +272,7 @@ text ("starttag", "foo:bar", [("one", "1"), ("two", "2")]), ("starttag_text", s)]) - def test_cdata_content(self): - contents = [ + @support.subTests('content', [ '<!-- not a comment --> ¬-an-entity-ref;', "<not a='start tag'>", '<a href="" /> <p> <span></span>', @@ -279,70 +285,127 @@ text 'src="http://www.example.org/r=\'+new ' 'Date().getTime()+\'"><\\/s\'+\'cript>\');\n//]]>'), '\n<!-- //\nvar foo = 3.14;\n// -->\n', - 'foo = "</sty" + "le>";', '<!-- \u2603 -->', - # these two should be invalid according to the HTML 5 spec, - # section 8.1.2.2 - #'foo = </\nscript>', - #'foo = </ script>', - ] - elements = ['script', 'style', 'SCRIPT', 'STYLE', 'Script', 'Style'] - for content in contents: - for element in elements: - element_lower = element.lower() - s = '<{element}>{content}</{element}>'.format(element=element, - content=content) - self._run_check(s, [("starttag", element_lower, []), - ("data", content), - ("endtag", element_lower)]) - - def test_cdata_with_closing_tags(self): + 'foo = "</ script>"', + 'foo = "</scripture>"', + 'foo = "</script\v>"', + 'foo = "</script\xa0>"', + 'foo = "</ſcript>"', + 'foo = "</scrıpt>"', + ]) + def test_script_content(self, content): + s = f'<script>{content}</script>' + self._run_check(s, [("starttag", "script", []), + ("data", content), + ("endtag", "script")]) + + @support.subTests('content', [ + 'a::before { content: "<!-- not a comment -->"; }', + 'a::before { content: "¬-an-entity-ref;"; }', + 'a::before { content: "<not a=\'start tag\'>"; }', + 'a::before { content: "\u2603"; }', + 'a::before { content: "< /style>"; }', + 'a::before { content: "</ style>"; }', + 'a::before { content: "</styled>"; }', + 'a::before { content: "</style\v>"; }', + 'a::before { content: "</style\xa0>"; }', + 'a::before { content: "</ſtyle>"; }', + ]) + def test_style_content(self, content): + s = f'<style>{content}</style>' + self._run_check(s, [("starttag", "style", []), + ("data", content), + ("endtag", "style")]) + + @support.subTests('endtag', ['script', 'SCRIPT', 'script ', 'script\n', + 'script/', 'script foo=bar', 'script foo=">"']) + def test_script_closing_tag(self, endtag): # see issue #13358 # make sure that HTMLParser calls handle_data only once for each CDATA. - # The normal event collector normalizes the events in get_events, - # so we override it to return the original list of events. - class Collector(EventCollector): - def get_events(self): - return self.events - - content = """<!-- not a comment --> ¬-an-entity-ref; - <a href="" /> </p><p> <span></span></style> - '</script' + '>'""" - for element in [' script', 'script ', ' script ', - '\nscript', 'script\n', '\nscript\n']: - element_lower = element.lower().strip() - s = '<script>{content}</{element}>'.format(element=element, - content=content) - self._run_check(s, [("starttag", element_lower, []), - ("data", content), - ("endtag", element_lower)], - collector=Collector(convert_charrefs=False)) - - def test_EOF_in_cdata(self): content = """<!-- not a comment --> ¬-an-entity-ref; <a href="" /> </p><p> <span></span></style> '</script' + '>'""" - s = f'<script>{content}' - self._run_check(s, [ - ("starttag", 'script', []), - ("data", content) - ]) + s = f'<ScrIPt>{content}</{endtag}>' + self._run_check(s, [("starttag", "script", []), + ("data", content), + ("endtag", "script")], + collector=EventCollectorNoNormalize(convert_charrefs=False)) + + @support.subTests('endtag', ['style', 'STYLE', 'style ', 'style\n', + 'style/', 'style foo=bar', 'style foo=">"']) + def test_style_closing_tag(self, endtag): + content = """ + b::before { content: "<!-- not a comment -->"; } + p::before { content: "¬-an-entity-ref;"; } + a::before { content: "<i>"; } + a::after { content: "</i>"; } + """ + s = f'<StyLE>{content}</{endtag}>' + self._run_check(s, [("starttag", "style", []), + ("data", content), + ("endtag", "style")], + collector=EventCollectorNoNormalize(convert_charrefs=False)) + + @support.subTests('tail,end', [ + ('', False), + ('<', False), + ('</', False), + ('</s', False), + ('</script', False), + ('</script ', True), + ('</script foo=bar', True), + ('</script foo=">', True), + ]) + def test_eof_in_script(self, tail, end): + content = "a = 123" + s = f'<ScrIPt>{content}{tail}' + self._run_check(s, [("starttag", "script", []), + ("data", content if end else content + tail)], + collector=EventCollectorNoNormalize(convert_charrefs=False)) def test_comments(self): html = ("<!-- I'm a valid comment -->" '<!--me too!-->' '<!------>' + '<!----->' '<!---->' + # abrupt-closing-of-empty-comment + '<!--->' + '<!-->' '<!----I have many hyphens---->' '<!-- I have a > in the middle -->' - '<!-- and I have -- in the middle! -->') + '<!-- and I have -- in the middle! -->' + '<!--incorrectly-closed-comment--!>' + '<!----!>' + '<!----!-->' + '<!---- >-->' + '<!---!>-->' + '<!--!>-->' + # nested-comment + '<!-- <!-- nested --> -->' + '<!--<!-->' + '<!--<!--!>' + ) expected = [('comment', " I'm a valid comment "), ('comment', 'me too!'), ('comment', '--'), + ('comment', '-'), + ('comment', ''), + ('comment', ''), ('comment', ''), ('comment', '--I have many hyphens--'), ('comment', ' I have a > in the middle '), - ('comment', ' and I have -- in the middle! ')] + ('comment', ' and I have -- in the middle! '), + ('comment', 'incorrectly-closed-comment'), + ('comment', ''), + ('comment', '--!'), + ('comment', '-- >'), + ('comment', '-!>'), + ('comment', '!>'), + ('comment', ' <!-- nested '), ('data', ' -->'), + ('comment', '<!'), + ('comment', '<!'), + ] self._run_check(html, expected) def test_condcoms(self): @@ -443,7 +506,7 @@ text self._run_check("</$>", [('comment', '$')]) self._run_check("</", [('data', '</')]) self._run_check("</a", []) - self._run_check("</ a>", [('endtag', 'a')]) + self._run_check("</ a>", [('comment', ' a')]) self._run_check("</ a", [('comment', ' a')]) self._run_check("<a<a>", [('starttag', 'a<a', [])]) self._run_check("</a<a>", [('endtag', 'a<a')]) @@ -491,6 +554,10 @@ text ] self._run_check(html, expected) + def test_slashes_in_endtag(self): + self._run_check('</a/>', [('endtag', 'a')]) + self._run_check('</a foo="var"/>', [('endtag', 'a')]) + def test_declaration_junk_chars(self): self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')]) @@ -525,15 +592,11 @@ text self._run_check(html, expected) def test_broken_invalid_end_tag(self): - # This is technically wrong (the "> shouldn't be included in the 'data') - # but is probably not worth fixing it (in addition to all the cases of - # the previous test, it would require a full attribute parsing). - # see #13993 html = '<b>This</b attr=">"> confuses the parser' expected = [('starttag', 'b', []), ('data', 'This'), ('endtag', 'b'), - ('data', '"> confuses the parser')] + ('data', ' confuses the parser')] self._run_check(html, expected) def test_correct_detection_of_start_tags(self): @@ -560,7 +623,7 @@ text html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>' expected = [ - ('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]), + ('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]), ('starttag', 'b', []), ('data', 'The '), ('starttag', 'a', [('href', 'some_url')]), @@ -749,9 +812,15 @@ class AttributesTestCase(TestCaseBase): ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)]) ] self._run_check("""<a b='v' c="v" d=v e>""", output) - self._run_check("""<a b = 'v' c = "v" d = v e>""", output) - self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output) - self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output) + self._run_check("<a foo==bar>", [('starttag', 'a', [('foo', '=bar')])]) + self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])]) + self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])]) + self._run_check("<a foo\v=bar>", [('starttag', 'a', [('foo\v', 'bar')])]) + self._run_check("<a foo\xa0=bar>", [('starttag', 'a', [('foo\xa0', 'bar')])]) + self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', ''), ('bar', None)])]) + self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', ''), ('bar', None)])]) + self._run_check("<a foo=\vbar>", [('starttag', 'a', [('foo', '\vbar')])]) + self._run_check("<a foo=\xa0bar>", [('starttag', 'a', [('foo', '\xa0bar')])]) def test_attr_values(self): self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""", @@ -760,6 +829,10 @@ class AttributesTestCase(TestCaseBase): ("d", "\txyz\n")])]) self._run_check("""<a b='' c="">""", [("starttag", "a", [("b", ""), ("c", "")])]) + self._run_check("<a b=\t c=\n>", + [("starttag", "a", [("b", ""), ("c", "")])]) + self._run_check("<a b=\v c=\xa0>", + [("starttag", "a", [("b", "\v"), ("c", "\xa0")])]) # Regression test for SF patch #669683. self._run_check("<e a=rgb(1,2,3)>", [("starttag", "e", [("a", "rgb(1,2,3)")])]) @@ -831,7 +904,7 @@ class AttributesTestCase(TestCaseBase): ('data', 'test - bad2'), ('endtag', 'a'), ('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]), ('data', 'test - bad3'), ('endtag', 'a'), - ('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]), + ('starttag', 'a', [('href', None), ('=', None), ("test' style", 'color:red;bad4')]), ('data', 'test - bad4'), ('endtag', 'a') ] self._run_check(html, expected) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 0c921ffbc25..b487bcabf01 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -9,6 +9,7 @@ # * test_univnewlines - tests universal newline support # * test_largefile - tests operations on a file greater than 2**32 bytes # (only enabled with -ulargefile) +# * test_free_threading/test_io - tests thread safety of io objects ################################################################################ # ATTENTION TEST WRITERS!!! diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index fc26e71ffcb..fccdcc975e6 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -21,6 +21,7 @@ import types import unittest.mock import weakref import typing +import re c_types = import_fresh_module('types', fresh=['_types']) py_types = import_fresh_module('types', blocked=['_types']) @@ -1375,6 +1376,27 @@ class MappingProxyTests(unittest.TestCase): view = self.mappingproxy(mapping) self.assertEqual(hash(view), hash(mapping)) + def test_richcompare(self): + mp1 = self.mappingproxy({'a': 1}) + mp1_2 = self.mappingproxy({'a': 1}) + mp2 = self.mappingproxy({'a': 2}) + + self.assertTrue(mp1 == mp1_2) + self.assertFalse(mp1 != mp1_2) + self.assertFalse(mp1 == mp2) + self.assertTrue(mp1 != mp2) + + msg = "not supported between instances of 'mappingproxy' and 'mappingproxy'" + + with self.assertRaisesRegex(TypeError, msg): + mp1 > mp2 + with self.assertRaisesRegex(TypeError, msg): + mp1 < mp1_2 + with self.assertRaisesRegex(TypeError, msg): + mp2 >= mp2 + with self.assertRaisesRegex(TypeError, msg): + mp1_2 <= mp1 + class ClassCreationTests(unittest.TestCase): @@ -2009,6 +2031,24 @@ class SimpleNamespaceTests(unittest.TestCase): self.assertEqual(ns1, ns2) self.assertNotEqual(ns2, types.SimpleNamespace()) + def test_richcompare_unsupported(self): + ns1 = types.SimpleNamespace(x=1) + ns2 = types.SimpleNamespace(y=2) + + msg = re.escape( + "not supported between instances of " + "'types.SimpleNamespace' and 'types.SimpleNamespace'" + ) + + with self.assertRaisesRegex(TypeError, msg): + ns1 > ns2 + with self.assertRaisesRegex(TypeError, msg): + ns1 >= ns2 + with self.assertRaisesRegex(TypeError, msg): + ns1 < ns2 + with self.assertRaisesRegex(TypeError, msg): + ns1 <= ns2 + def test_nested(self): ns1 = types.SimpleNamespace(a=1, b=2) ns2 = types.SimpleNamespace() diff --git a/Lib/test/test_unittest/test_case.py b/Lib/test/test_unittest/test_case.py index d66cab146af..cf10e956bf2 100644 --- a/Lib/test/test_unittest/test_case.py +++ b/Lib/test/test_unittest/test_case.py @@ -1920,6 +1920,22 @@ test case with self.assertLogs(): raise ZeroDivisionError("Unexpected") + def testAssertLogsWithFormatter(self): + # Check alternative formats will be respected + format = "[No.1: the larch] %(levelname)s:%(name)s:%(message)s" + formatter = logging.Formatter(format) + with self.assertNoStderr(): + with self.assertLogs() as cm: + log_foo.info("1") + log_foobar.debug("2") + self.assertEqual(cm.output, ["INFO:foo:1"]) + self.assertLogRecords(cm.records, [{'name': 'foo'}]) + with self.assertLogs(formatter=formatter) as cm: + log_foo.info("1") + log_foobar.debug("2") + self.assertEqual(cm.output, ["[No.1: the larch] INFO:foo:1"]) + self.assertLogRecords(cm.records, [{'name': 'foo'}]) + def testAssertNoLogsDefault(self): with self.assertRaises(self.failureException) as cm: with self.assertNoLogs(): diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 38be2cd437f..bf6d5074fde 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -218,6 +218,33 @@ class ElementTreeTest(unittest.TestCase): def serialize_check(self, elem, expected): self.assertEqual(serialize(elem), expected) + def test_constructor(self): + # Test constructor behavior. + + with self.assertRaises(TypeError): + tree = ET.ElementTree("") + with self.assertRaises(TypeError): + tree = ET.ElementTree(ET.ElementTree()) + + def test_setroot(self): + # Test _setroot behavior. + + tree = ET.ElementTree() + element = ET.Element("tag") + tree._setroot(element) + self.assertEqual(tree.getroot().tag, "tag") + self.assertEqual(tree.getroot(), element) + + # Test behavior with an invalid root element + + tree = ET.ElementTree() + with self.assertRaises(TypeError): + tree._setroot("") + with self.assertRaises(TypeError): + tree._setroot(ET.ElementTree()) + with self.assertRaises(TypeError): + tree._setroot(None) + def test_interface(self): # Test element tree interface. diff --git a/Lib/unittest/_log.py b/Lib/unittest/_log.py index 94868e5bb95..3d69385ea24 100644 --- a/Lib/unittest/_log.py +++ b/Lib/unittest/_log.py @@ -30,7 +30,7 @@ class _AssertLogsContext(_BaseTestCaseContext): LOGGING_FORMAT = "%(levelname)s:%(name)s:%(message)s" - def __init__(self, test_case, logger_name, level, no_logs): + def __init__(self, test_case, logger_name, level, no_logs, formatter=None): _BaseTestCaseContext.__init__(self, test_case) self.logger_name = logger_name if level: @@ -39,13 +39,14 @@ class _AssertLogsContext(_BaseTestCaseContext): self.level = logging.INFO self.msg = None self.no_logs = no_logs + self.formatter = formatter def __enter__(self): if isinstance(self.logger_name, logging.Logger): logger = self.logger = self.logger_name else: logger = self.logger = logging.getLogger(self.logger_name) - formatter = logging.Formatter(self.LOGGING_FORMAT) + formatter = self.formatter or logging.Formatter(self.LOGGING_FORMAT) handler = _CapturingHandler() handler.setLevel(self.level) handler.setFormatter(formatter) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index db10de68e4a..eba50839cd3 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -849,7 +849,7 @@ class TestCase(object): context = _AssertNotWarnsContext(expected_warning, self) return context.handle('_assertNotWarns', args, kwargs) - def assertLogs(self, logger=None, level=None): + def assertLogs(self, logger=None, level=None, formatter=None): """Fail unless a log message of level *level* or higher is emitted on *logger_name* or its children. If omitted, *level* defaults to INFO and *logger* defaults to the root logger. @@ -861,6 +861,8 @@ class TestCase(object): `records` attribute will be a list of the corresponding LogRecord objects. + Optionally supply `formatter` to control how messages are formatted. + Example:: with self.assertLogs('foo', level='INFO') as cm: @@ -871,7 +873,7 @@ class TestCase(object): """ # Lazy import to avoid importing logging if it is not needed. from ._log import _AssertLogsContext - return _AssertLogsContext(self, logger, level, no_logs=False) + return _AssertLogsContext(self, logger, level, no_logs=False, formatter=formatter) def assertNoLogs(self, logger=None, level=None): """ Fail unless no log messages of level *level* or higher are emitted diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 44ab5d18624..dafe5b1b8a0 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -527,7 +527,9 @@ class ElementTree: """ def __init__(self, element=None, file=None): - # assert element is None or iselement(element) + if element is not None and not iselement(element): + raise TypeError('expected an Element, not %s' % + type(element).__name__) self._root = element # first node if file: self.parse(file) @@ -543,7 +545,9 @@ class ElementTree: with the given element. Use with care! """ - # assert iselement(element) + if not iselement(element): + raise TypeError('expected an Element, not %s' + % type(element).__name__) self._root = element def parse(self, source, parser=None): @@ -709,6 +713,8 @@ class ElementTree: of start/end tags """ + if self._root is None: + raise TypeError('ElementTree not initialized') if not method: method = "xml" elif method not in _serialize: |