diff options
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/_pyio.py | 80 | ||||
-rw-r--r-- | Lib/html/parser.py | 18 | ||||
-rw-r--r-- | Lib/test/test_free_threading/test_io.py | 42 | ||||
-rw-r--r-- | Lib/test/test_getpath.py | 21 | ||||
-rw-r--r-- | Lib/test/test_htmlparser.py | 32 | ||||
-rw-r--r-- | Lib/test/test_io.py | 1 |
6 files changed, 144 insertions, 50 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py index fb2a6d049ca..5db8ce9244b 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -876,16 +876,28 @@ class BytesIO(BufferedIOBase): _buffer = None def __init__(self, initial_bytes=None): + # Use to keep self._buffer and self._pos consistent. + self._lock = Lock() + buf = bytearray() if initial_bytes is not None: buf += initial_bytes - self._buffer = buf - self._pos = 0 + + with self._lock: + self._buffer = buf + self._pos = 0 def __getstate__(self): if self.closed: raise ValueError("__getstate__ on closed file") - return self.__dict__.copy() + with self._lock: + state = self.__dict__.copy() + del state['_lock'] + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self._lock = Lock() def getvalue(self): """Return the bytes value (contents) of the buffer @@ -918,14 +930,16 @@ class BytesIO(BufferedIOBase): raise TypeError(f"{size!r} is not an integer") else: size = size_index() - if size < 0: - size = len(self._buffer) - if len(self._buffer) <= self._pos: - return b"" - newpos = min(len(self._buffer), self._pos + size) - b = self._buffer[self._pos : newpos] - self._pos = newpos - return bytes(b) + + with self._lock: + if size < 0: + size = len(self._buffer) + if len(self._buffer) <= self._pos: + return b"" + newpos = min(len(self._buffer), self._pos + size) + b = self._buffer[self._pos : newpos] + self._pos = newpos + return bytes(b) def read1(self, size=-1): """This is the same as read. @@ -941,12 +955,14 @@ class BytesIO(BufferedIOBase): n = view.nbytes # Size of any bytes-like object if n == 0: return 0 - pos = self._pos - if pos > len(self._buffer): - # Pad buffer to pos with null bytes. - self._buffer.resize(pos) - self._buffer[pos:pos + n] = b - self._pos += n + + with self._lock: + pos = self._pos + if pos > len(self._buffer): + # Pad buffer to pos with null bytes. + self._buffer.resize(pos) + self._buffer[pos:pos + n] = b + self._pos += n return n def seek(self, pos, whence=0): @@ -963,9 +979,11 @@ class BytesIO(BufferedIOBase): raise ValueError("negative seek position %r" % (pos,)) self._pos = pos elif whence == 1: - self._pos = max(0, self._pos + pos) + with self._lock: + self._pos = max(0, self._pos + pos) elif whence == 2: - self._pos = max(0, len(self._buffer) + pos) + with self._lock: + self._pos = max(0, len(self._buffer) + pos) else: raise ValueError("unsupported whence value") return self._pos @@ -978,18 +996,20 @@ class BytesIO(BufferedIOBase): def truncate(self, pos=None): if self.closed: raise ValueError("truncate on closed file") - if pos is None: - pos = self._pos - else: - try: - pos_index = pos.__index__ - except AttributeError: - raise TypeError(f"{pos!r} is not an integer") + + with self._lock: + if pos is None: + pos = self._pos else: - pos = pos_index() - if pos < 0: - raise ValueError("negative truncate position %r" % (pos,)) - del self._buffer[pos:] + try: + pos_index = pos.__index__ + except AttributeError: + raise TypeError(f"{pos!r} is not an integer") + else: + pos = pos_index() + if pos < 0: + raise ValueError("negative truncate position %r" % (pos,)) + del self._buffer[pos:] return pos def readable(self): diff --git a/Lib/html/parser.py b/Lib/html/parser.py index cc15de07b5b..9b4f0959913 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -29,7 +29,8 @@ attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;= starttagopen = re.compile('<[a-zA-Z]') endtagopen = re.compile('</[a-zA-Z]') piclose = re.compile('>') -commentclose = re.compile(r'--\s*>') +commentclose = re.compile(r'--!?>') +commentabruptclose = re.compile(r'-?>') # Note: # 1) if you change tagfind/attrfind remember to update locatetagend too; # 2) if you change tagfind/attrfind and/or locatetagend the parser will @@ -336,6 +337,21 @@ class HTMLParser(_markupbase.ParserBase): else: return self.parse_bogus_comment(i) + # Internal -- parse comment, return length or -1 if not terminated + # see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state + def parse_comment(self, i, report=True): + rawdata = self.rawdata + assert rawdata.startswith('<!--', i), 'unexpected call to parse_comment()' + match = commentclose.search(rawdata, i+4) + if not match: + match = commentabruptclose.match(rawdata, i+4) + if not match: + return -1 + if report: + j = match.start() + self.handle_comment(rawdata[i+4: j]) + return match.end() + # Internal -- parse bogus comment, return length or -1 if not terminated # see https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state def parse_bogus_comment(self, i, report=1): diff --git a/Lib/test/test_free_threading/test_io.py b/Lib/test/test_free_threading/test_io.py index f9bec740ddf..41d89e04da8 100644 --- a/Lib/test/test_free_threading/test_io.py +++ b/Lib/test/test_free_threading/test_io.py @@ -1,12 +1,13 @@ +import io +import _pyio as pyio import threading from unittest import TestCase from test.support import threading_helper from random import randint -from io import BytesIO from sys import getsizeof -class TestBytesIO(TestCase): +class ThreadSafetyMixin: # Test pretty much everything that can break under free-threading. # Non-deterministic, but at least one of these things will fail if # BytesIO object is not free-thread safe. @@ -90,20 +91,27 @@ class TestBytesIO(TestCase): barrier.wait() getsizeof(b) - self.check([write] * 10, BytesIO()) - self.check([writelines] * 10, BytesIO()) - self.check([write] * 10 + [truncate] * 10, BytesIO()) - self.check([truncate] + [read] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [read1] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [readline] * 10, BytesIO(b'0\n'*20480)) - self.check([truncate] + [readlines] * 10, BytesIO(b'0\n'*20480)) - self.check([truncate] + [readinto] * 10, BytesIO(b'0\n'*204800), bytearray(b'0\n'*204800)) - self.check([close] + [write] * 10, BytesIO()) - self.check([truncate] + [getvalue] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [getbuffer] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [iter] * 10, BytesIO(b'0\n'*20480)) - self.check([truncate] + [getstate] * 10, BytesIO(b'0\n'*204800)) - self.check([truncate] + [setstate] * 10, BytesIO(b'0\n'*204800), (b'123', 0, None)) - self.check([truncate] + [sizeof] * 10, BytesIO(b'0\n'*204800)) + self.check([write] * 10, self.ioclass()) + self.check([writelines] * 10, self.ioclass()) + self.check([write] * 10 + [truncate] * 10, self.ioclass()) + self.check([truncate] + [read] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [read1] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [readline] * 10, self.ioclass(b'0\n'*20480)) + self.check([truncate] + [readlines] * 10, self.ioclass(b'0\n'*20480)) + self.check([truncate] + [readinto] * 10, self.ioclass(b'0\n'*204800), bytearray(b'0\n'*204800)) + self.check([close] + [write] * 10, self.ioclass()) + self.check([truncate] + [getvalue] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [getbuffer] * 10, self.ioclass(b'0\n'*204800)) + self.check([truncate] + [iter] * 10, self.ioclass(b'0\n'*20480)) + self.check([truncate] + [getstate] * 10, self.ioclass(b'0\n'*204800)) + state = self.ioclass(b'123').__getstate__() + self.check([truncate] + [setstate] * 10, self.ioclass(b'0\n'*204800), state) + self.check([truncate] + [sizeof] * 10, self.ioclass(b'0\n'*204800)) # no tests for seek or tell because they don't break anything + +class CBytesIOTest(ThreadSafetyMixin, TestCase): + ioclass = io.BytesIO + +class PyBytesIOTest(ThreadSafetyMixin, TestCase): + ioclass = pyio.BytesIO diff --git a/Lib/test/test_getpath.py b/Lib/test/test_getpath.py index f86df9d0d03..83f09f34955 100644 --- a/Lib/test/test_getpath.py +++ b/Lib/test/test_getpath.py @@ -354,6 +354,27 @@ class MockGetPathTests(unittest.TestCase): actual = getpath(ns, expected) self.assertEqual(expected, actual) + def test_venv_posix_without_home_key(self): + ns = MockPosixNamespace( + argv0="/venv/bin/python3", + PREFIX="/usr", + ENV_PATH="/usr/bin", + ) + # Setup the bare minimum venv + ns.add_known_xfile("/usr/bin/python3") + ns.add_known_xfile("/venv/bin/python3") + ns.add_known_link("/venv/bin/python3", "/usr/bin/python3") + ns.add_known_file("/venv/pyvenv.cfg", [ + # home = key intentionally omitted + ]) + expected = dict( + executable="/venv/bin/python3", + prefix="/venv", + base_prefix="/usr", + ) + actual = getpath(ns, expected) + self.assertEqual(expected, actual) + def test_venv_changed_name_posix(self): "Test a venv layout on *nix." ns = MockPosixNamespace( diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index d0d2c54217c..15cad061889 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -367,17 +367,45 @@ text html = ("<!-- I'm a valid comment -->" '<!--me too!-->' '<!------>' + '<!----->' '<!---->' + # abrupt-closing-of-empty-comment + '<!--->' + '<!-->' '<!----I have many hyphens---->' '<!-- I have a > in the middle -->' - '<!-- and I have -- in the middle! -->') + '<!-- and I have -- in the middle! -->' + '<!--incorrectly-closed-comment--!>' + '<!----!>' + '<!----!-->' + '<!---- >-->' + '<!---!>-->' + '<!--!>-->' + # nested-comment + '<!-- <!-- nested --> -->' + '<!--<!-->' + '<!--<!--!>' + ) expected = [('comment', " I'm a valid comment "), ('comment', 'me too!'), ('comment', '--'), + ('comment', '-'), + ('comment', ''), + ('comment', ''), ('comment', ''), ('comment', '--I have many hyphens--'), ('comment', ' I have a > in the middle '), - ('comment', ' and I have -- in the middle! ')] + ('comment', ' and I have -- in the middle! '), + ('comment', 'incorrectly-closed-comment'), + ('comment', ''), + ('comment', '--!'), + ('comment', '-- >'), + ('comment', '-!>'), + ('comment', '!>'), + ('comment', ' <!-- nested '), ('data', ' -->'), + ('comment', '<!'), + ('comment', '<!'), + ] self._run_check(html, expected) def test_condcoms(self): diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 0c921ffbc25..b487bcabf01 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -9,6 +9,7 @@ # * test_univnewlines - tests universal newline support # * test_largefile - tests operations on a file greater than 2**32 bytes # (only enabled with -ulargefile) +# * test_free_threading/test_io - tests thread safety of io objects ################################################################################ # ATTENTION TEST WRITERS!!! |