summaryrefslogtreecommitdiffstatshomepage
path: root/tests/unicode/unicode.py
diff options
context:
space:
mode:
authortll <1040424979@qq.com>2017-06-24 08:38:32 +0800
committerDamien George <damien.p.george@gmail.com>2017-09-06 16:43:09 +1000
commit68c28174d0e0ec3f6b1461aea3a0b6a1b84610bb (patch)
tree441a42ce59c5f965b66722bd6a5a5b24525c6bcf /tests/unicode/unicode.py
parent069fc48bf60b31fca4339d26cee7b4a415b185f9 (diff)
downloadmicropython-68c28174d0e0ec3f6b1461aea3a0b6a1b84610bb.tar.gz
micropython-68c28174d0e0ec3f6b1461aea3a0b6a1b84610bb.zip
py/objstr: Add check for valid UTF-8 when making a str from bytes.
This patch adds a function utf8_check() to check for a valid UTF-8 encoded string, and calls it when constructing a str from raw bytes. The feature is selectable at compile time via MICROPY_PY_BUILTINS_STR_UNICODE_CHECK and is enabled if unicode is enabled. It costs about 110 bytes on Thumb-2, 150 bytes on Xtensa and 170 bytes on x86-64.
Diffstat (limited to 'tests/unicode/unicode.py')
-rw-r--r--tests/unicode/unicode.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/tests/unicode/unicode.py b/tests/unicode/unicode.py
index 5f29bc1c95..3a35ce8948 100644
--- a/tests/unicode/unicode.py
+++ b/tests/unicode/unicode.py
@@ -33,3 +33,17 @@ try:
int('\u0200')
except ValueError:
print('ValueError')
+
+# test invalid UTF-8 string
+try:
+ str(b'ab\xa1', 'utf8')
+except UnicodeError:
+ print('UnicodeError')
+try:
+ str(b'ab\xf8', 'utf8')
+except UnicodeError:
+ print('UnicodeError')
+try:
+ str(bytearray(b'ab\xc0a'), 'utf8')
+except UnicodeError:
+ print('UnicodeError')