summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorDamien George <damien.p.george@gmail.com>2014-03-21 23:32:59 +0000
committerDamien George <damien.p.george@gmail.com>2014-03-21 23:32:59 +0000
commitb32db4e1ad381edaaf91320cb2830e4a1b151863 (patch)
treec31e254cef1b73aa19bba253ba78ec5375b3b656
parent8913c04831c94d2bcb82b0447ab7ccf6b2e346a6 (diff)
parent093b8a5fa62b4403d56d4323fe1a088dbe9f7efe (diff)
downloadmicropython-b32db4e1ad381edaaf91320cb2830e4a1b151863.tar.gz
micropython-b32db4e1ad381edaaf91320cb2830e4a1b151863.zip
Merge branch 'master' of github.com:micropython/micropython
-rw-r--r--py/builtin.c22
-rw-r--r--py/objstr.c136
-rw-r--r--py/runtime.c4
-rw-r--r--tests/basics/bytes.py28
-rw-r--r--tests/basics/int-long.py7
5 files changed, 164 insertions, 33 deletions
diff --git a/py/builtin.c b/py/builtin.c
index 2e0627fa5f..93e91072c4 100644
--- a/py/builtin.c
+++ b/py/builtin.c
@@ -375,28 +375,6 @@ STATIC mp_obj_t mp_builtin_sorted(uint n_args, const mp_obj_t *args, mp_map_t *k
MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted);
-STATIC mp_obj_t mp_builtin_str(mp_obj_t o_in) {
- vstr_t *vstr = vstr_new();
- mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, o_in, PRINT_STR);
- mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
- vstr_free(vstr);
- return s;
-}
-
-MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_str_obj, mp_builtin_str);
-
-// TODO: This should be type, this is just quick CPython compat hack
-STATIC mp_obj_t mp_builtin_bytes(uint n_args, const mp_obj_t *args) {
- if (!MP_OBJ_IS_QSTR(args[0]) && !MP_OBJ_IS_TYPE(args[0], &str_type)) {
- assert(0);
- }
- // Currently, MicroPython strings are mix between CPython byte and unicode
- // strings. So, conversion is null so far.
- return args[0];
-}
-
-MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_bytes_obj, 1, 3, mp_builtin_bytes);
-
STATIC mp_obj_t mp_builtin_id(mp_obj_t o_in) {
return mp_obj_new_int((machine_int_t)o_in);
}
diff --git a/py/objstr.c b/py/objstr.c
index 77cefa82bc..35a948700c 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -14,9 +14,11 @@ typedef struct _mp_obj_str_t {
mp_obj_base_t base;
machine_uint_t hash : 16; // XXX here we assume the hash size is 16 bits (it is at the moment; see qstr.c)
machine_uint_t len : 16; // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte
- byte data[];
+ const byte *data;
} mp_obj_str_t;
+const mp_obj_t mp_const_empty_bytes;
+
// use this macro to extract the string hash
#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }
@@ -28,6 +30,7 @@ typedef struct _mp_obj_str_t {
STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
+STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len);
/******************************************************************************/
/* str */
@@ -78,6 +81,109 @@ STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env,
}
}
+STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+ switch (n_args) {
+ case 0:
+ return MP_OBJ_NEW_QSTR(MP_QSTR_);
+
+ case 1:
+ {
+ vstr_t *vstr = vstr_new();
+ mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR);
+ mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
+ vstr_free(vstr);
+ return s;
+ }
+
+ case 2:
+ case 3:
+ {
+ // TODO: validate 2nd/3rd args
+ if (!MP_OBJ_IS_TYPE(args[0], &bytes_type)) {
+ nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected"));
+ }
+ GET_STR_DATA_LEN(args[0], str_data, str_len);
+ GET_STR_HASH(args[0], str_hash);
+ mp_obj_str_t *o = str_new(&str_type, NULL, str_len);
+ o->data = str_data;
+ o->hash = str_hash;
+ return o;
+ }
+
+ default:
+ nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments"));
+ }
+}
+
+STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+ if (n_args == 0) {
+ return mp_const_empty_bytes;
+ }
+
+ if (MP_OBJ_IS_STR(args[0])) {
+ if (n_args < 2 || n_args > 3) {
+ goto wrong_args;
+ }
+ GET_STR_DATA_LEN(args[0], str_data, str_len);
+ GET_STR_HASH(args[0], str_hash);
+ mp_obj_str_t *o = str_new(&bytes_type, NULL, str_len);
+ o->data = str_data;
+ o->hash = str_hash;
+ return o;
+ }
+
+ if (n_args > 1) {
+ goto wrong_args;
+ }
+
+ if (MP_OBJ_IS_SMALL_INT(args[0])) {
+ uint len = MP_OBJ_SMALL_INT_VALUE(args[0]);
+ byte *data;
+
+ mp_obj_t o = mp_obj_str_builder_start(&bytes_type, len, &data);
+ memset(data, 0, len);
+ return mp_obj_str_builder_end(o);
+ }
+
+ int len;
+ byte *data;
+ vstr_t *vstr = NULL;
+ mp_obj_t o = NULL;
+ // Try to create array of exact len if initializer len is known
+ mp_obj_t len_in = mp_obj_len_maybe(args[0]);
+ if (len_in == MP_OBJ_NULL) {
+ len = -1;
+ vstr = vstr_new();
+ } else {
+ len = MP_OBJ_SMALL_INT_VALUE(len_in);
+ o = mp_obj_str_builder_start(&bytes_type, len, &data);
+ }
+
+ mp_obj_t iterable = rt_getiter(args[0]);
+ mp_obj_t item;
+ while ((item = rt_iternext(iterable)) != mp_const_stop_iteration) {
+ if (len == -1) {
+ vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item));
+ } else {
+ *data++ = MP_OBJ_SMALL_INT_VALUE(item);
+ }
+ }
+
+ if (len == -1) {
+ vstr_shrink(vstr);
+ // TODO: Optimize, borrow buffer from vstr
+ len = vstr_len(vstr);
+ o = mp_obj_str_builder_start(&bytes_type, len, &data);
+ memcpy(data, vstr_str(vstr), len);
+ vstr_free(vstr);
+ }
+
+ return mp_obj_str_builder_end(o);
+
+wrong_args:
+ nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments"));
+}
+
// like strstr but with specified length and allows \0 bytes
// TODO replace with something more efficient/standard
STATIC const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) {
@@ -619,6 +725,7 @@ const mp_obj_type_t str_type = {
{ &mp_type_type },
.name = MP_QSTR_str,
.print = str_print,
+ .make_new = str_make_new,
.binary_op = str_binary_op,
.getiter = mp_obj_new_str_iterator,
.methods = str_type_methods,
@@ -630,34 +737,45 @@ const mp_obj_type_t bytes_type = {
{ &mp_type_type },
.name = MP_QSTR_bytes,
.print = str_print,
+ .make_new = bytes_make_new,
.binary_op = str_binary_op,
.getiter = mp_obj_new_bytes_iterator,
.methods = str_type_methods,
};
+// the zero-length bytes
+STATIC const mp_obj_str_t empty_bytes_obj = {{&bytes_type}, 0, 0, NULL};
+const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj;
+
mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) {
- mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
+ mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
o->base.type = type;
o->len = len;
- *data = o->data;
+ byte *p = m_new(byte, len + 1);
+ o->data = p;
+ *data = p;
return o;
}
mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
- assert(MP_OBJ_IS_STR(o_in));
mp_obj_str_t *o = o_in;
o->hash = qstr_compute_hash(o->data, o->len);
- o->data[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
+ byte *p = (byte*)o->data;
+ p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
return o;
}
STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len) {
- mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
+ mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
o->base.type = type;
- o->hash = qstr_compute_hash(data, len);
o->len = len;
- memcpy(o->data, data, len * sizeof(byte));
- o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
+ if (data) {
+ o->hash = qstr_compute_hash(data, len);
+ byte *p = m_new(byte, len + 1);
+ o->data = p;
+ memcpy(p, data, len * sizeof(byte));
+ p[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
+ }
return o;
}
diff --git a/py/runtime.c b/py/runtime.c
index c268fd5464..4bcb91c547 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -89,6 +89,7 @@ STATIC const mp_builtin_elem_t builtin_table[] = {
// built-in types
{ MP_QSTR_bool, (mp_obj_t)&bool_type },
+ { MP_QSTR_bytes, (mp_obj_t)&bytes_type },
#if MICROPY_ENABLE_FLOAT
{ MP_QSTR_complex, (mp_obj_t)&mp_type_complex },
#endif
@@ -102,6 +103,7 @@ STATIC const mp_builtin_elem_t builtin_table[] = {
{ MP_QSTR_list, (mp_obj_t)&list_type },
{ MP_QSTR_map, (mp_obj_t)&map_type },
{ MP_QSTR_set, (mp_obj_t)&set_type },
+ { MP_QSTR_str, (mp_obj_t)&str_type },
{ MP_QSTR_super, (mp_obj_t)&super_type },
{ MP_QSTR_tuple, (mp_obj_t)&tuple_type },
{ MP_QSTR_type, (mp_obj_t)&mp_type_type },
@@ -114,7 +116,6 @@ STATIC const mp_builtin_elem_t builtin_table[] = {
{ MP_QSTR_abs, (mp_obj_t)&mp_builtin_abs_obj },
{ MP_QSTR_all, (mp_obj_t)&mp_builtin_all_obj },
{ MP_QSTR_any, (mp_obj_t)&mp_builtin_any_obj },
- { MP_QSTR_bytes, (mp_obj_t)&mp_builtin_bytes_obj },
{ MP_QSTR_callable, (mp_obj_t)&mp_builtin_callable_obj },
{ MP_QSTR_chr, (mp_obj_t)&mp_builtin_chr_obj },
{ MP_QSTR_dir, (mp_obj_t)&mp_builtin_dir_obj },
@@ -137,7 +138,6 @@ STATIC const mp_builtin_elem_t builtin_table[] = {
{ MP_QSTR_repr, (mp_obj_t)&mp_builtin_repr_obj },
{ MP_QSTR_sorted, (mp_obj_t)&mp_builtin_sorted_obj },
{ MP_QSTR_sum, (mp_obj_t)&mp_builtin_sum_obj },
- { MP_QSTR_str, (mp_obj_t)&mp_builtin_str_obj },
{ MP_QSTR_bytearray, (mp_obj_t)&mp_builtin_bytearray_obj },
// built-in exceptions
diff --git a/tests/basics/bytes.py b/tests/basics/bytes.py
index 7d0cf22d44..a084bc3994 100644
--- a/tests/basics/bytes.py
+++ b/tests/basics/bytes.py
@@ -4,8 +4,36 @@ print(str(a))
print(repr(a))
print(a[0], a[2])
print(a[-1])
+print(str(a, "utf-8"))
+print(str(a, "utf-8", "ignore"))
+try:
+ str(a, "utf-8", "ignore", "toomuch")
+except TypeError:
+ print("TypeError")
s = 0
for i in a:
s += i
print(s)
+
+
+print(bytes("abc", "utf-8"))
+print(bytes("abc", "utf-8", "replace"))
+try:
+ bytes("abc")
+except TypeError:
+ print("TypeError")
+try:
+ bytes("abc", "utf-8", "replace", "toomuch")
+except TypeError:
+ print("TypeError")
+
+print(bytes(3))
+
+print(bytes([3, 2, 1]))
+print(bytes(range(5)))
+
+def gen():
+ for i in range(4):
+ yield i
+print(bytes(gen()))
diff --git a/tests/basics/int-long.py b/tests/basics/int-long.py
index f867d8037d..3567e08b2d 100644
--- a/tests/basics/int-long.py
+++ b/tests/basics/int-long.py
@@ -37,3 +37,10 @@ a <<= 5
print(a)
a >>= 1
print(a)
+
+# Test referential integrity of long ints
+a = 0x1ffffffff
+b = a
+a += 1
+print(a)
+print(b)