aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/test/test_codeccallbacks.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r--Lib/test/test_codeccallbacks.py39
1 files changed, 38 insertions, 1 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index 86e5e5c1474..a767f67a02c 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -2,6 +2,7 @@ from _codecs import _unregister_error as _codecs_unregister_error
import codecs
import html.entities
import itertools
+import re
import sys
import unicodedata
import unittest
@@ -1125,7 +1126,7 @@ class CodecCallbackTest(unittest.TestCase):
text = 'abc<def>ghi'*n
text.translate(charmap)
- def test_mutatingdecodehandler(self):
+ def test_mutating_decode_handler(self):
baddata = [
("ascii", b"\xff"),
("utf-7", b"++"),
@@ -1160,6 +1161,42 @@ class CodecCallbackTest(unittest.TestCase):
for (encoding, data) in baddata:
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
+ def test_mutating_decode_handler_unicode_escape(self):
+ decode = codecs.unicode_escape_decode
+ def mutating(exc):
+ if isinstance(exc, UnicodeDecodeError):
+ r = data.get(exc.object[:exc.end])
+ if r is not None:
+ exc.object = r[0] + exc.object[exc.end:]
+ return ('\u0404', r[1])
+ raise AssertionError("don't know how to handle %r" % exc)
+
+ codecs.register_error('test.mutating2', mutating)
+ data = {
+ br'\x0': (b'\\', 0),
+ br'\x3': (b'xxx\\', 3),
+ br'\x5': (b'x\\', 1),
+ }
+ def check(input, expected, msg):
+ with self.assertWarns(DeprecationWarning) as cm:
+ self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input)))
+ self.assertIn(msg, str(cm.warning))
+
+ check(br'\x0n\z', '\u0404\n\\z', r'"\z" is an invalid escape sequence')
+ check(br'\x0n\501', '\u0404\n\u0141', r'"\501" is an invalid octal escape sequence')
+ check(br'\x0z', '\u0404\\z', r'"\z" is an invalid escape sequence')
+
+ check(br'\x3n\zr', '\u0404\n\\zr', r'"\z" is an invalid escape sequence')
+ check(br'\x3zr', '\u0404\\zr', r'"\z" is an invalid escape sequence')
+ check(br'\x3z5', '\u0404\\z5', r'"\z" is an invalid escape sequence')
+ check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r'"\z" is an invalid escape sequence')
+ check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r'"\z" is an invalid escape sequence')
+
+ check(br'\x5n\z', '\u0404\n\\z', r'"\z" is an invalid escape sequence')
+ check(br'\x5n\501', '\u0404\n\u0141', r'"\501" is an invalid octal escape sequence')
+ check(br'\x5z', '\u0404\\z', r'"\z" is an invalid escape sequence')
+ check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r'"\z" is an invalid escape sequence')
+
# issue32583
def test_crashing_decode_handler(self):
# better generating one more character to fill the extra space slot