summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorPaul Sokolovsky <pfalcon@users.sourceforge.net>2014-08-11 22:36:38 +0300
committerPaul Sokolovsky <pfalcon@users.sourceforge.net>2014-08-11 22:38:00 +0300
commit9749b2fb0d6c4ff65c7395e09028102c9508b34a (patch)
treeb6aaa1bb52bd1ed7516ae4545771f0b797ae8057
parent6e6bcccdc19f35ccb09c064cb444eb498c307a33 (diff)
downloadmicropython-9749b2fb0d6c4ff65c7395e09028102c9508b34a.tar.gz
micropython-9749b2fb0d6c4ff65c7395e09028102c9508b34a.zip
objstr: Make sure that bytes are indexed as bytes, not as unicode.
Fixes #795.
-rw-r--r--py/objstr.c13
-rw-r--r--tests/basics/bytes.py5
2 files changed, 12 insertions, 6 deletions
diff --git a/py/objstr.c b/py/objstr.c
index 9d34609882..fb170f83c9 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -353,7 +353,8 @@ const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, u
}
#endif
-STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
+// This is used for both bytes and 8-bit strings. This is not used for unicode strings.
+STATIC mp_obj_t bytes_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
mp_obj_type_t *type = mp_obj_get_type(self_in);
GET_STR_DATA_LEN(self_in, self_data, self_len);
if (value == MP_OBJ_SENTINEL) {
@@ -368,11 +369,11 @@ STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
return mp_obj_new_str_of_type(type, self_data + slice.start, slice.stop - slice.start);
}
#endif
- const byte *p = str_index_to_ptr(type, self_data, self_len, index, false);
+ mp_uint_t index_val = mp_get_index(type, self_len, index, false);
if (type == &mp_type_bytes) {
- return MP_OBJ_NEW_SMALL_INT(*p);
+ return MP_OBJ_NEW_SMALL_INT(self_data[index_val]);
} else {
- return mp_obj_new_str((char*)p, 1, true);
+ return mp_obj_new_str((char*)&self_data[index_val], 1, true);
}
} else {
return MP_OBJ_NULL; // op not supported
@@ -1704,7 +1705,7 @@ const mp_obj_type_t mp_type_str = {
.print = str_print,
.make_new = str_make_new,
.binary_op = mp_obj_str_binary_op,
- .subscr = str_subscr,
+ .subscr = bytes_subscr,
.getiter = mp_obj_new_str_iterator,
.buffer_p = { .get_buffer = mp_obj_str_get_buffer },
.locals_dict = (mp_obj_t)&str_locals_dict,
@@ -1718,7 +1719,7 @@ const mp_obj_type_t mp_type_bytes = {
.print = str_print,
.make_new = bytes_make_new,
.binary_op = mp_obj_str_binary_op,
- .subscr = str_subscr,
+ .subscr = bytes_subscr,
.getiter = mp_obj_new_bytes_iterator,
.buffer_p = { .get_buffer = mp_obj_str_get_buffer },
.locals_dict = (mp_obj_t)&str_locals_dict,
diff --git a/tests/basics/bytes.py b/tests/basics/bytes.py
index a084bc3994..ce027e7d19 100644
--- a/tests/basics/bytes.py
+++ b/tests/basics/bytes.py
@@ -37,3 +37,8 @@ def gen():
for i in range(4):
yield i
print(bytes(gen()))
+
+# Make sure bytes are not mistreated as unicode
+x = b"\xff\x8e\xfe}\xfd\x7f"
+print(len(x))
+print(x[0], x[1], x[2], x[3])