diff options
Diffstat (limited to 'py/objstr.c')
-rw-r--r-- | py/objstr.c | 149 |
1 files changed, 78 insertions, 71 deletions
diff --git a/py/objstr.c b/py/objstr.c index f082e95591..70de0a693a 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -36,15 +36,15 @@ #include "py/runtime.h" #include "py/stackctrl.h" -STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, mp_uint_t n_args, const mp_obj_t *args, mp_obj_t dict); +STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, size_t n_args, const mp_obj_t *args, mp_obj_t dict); -STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str); +STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf); STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in); /******************************************************************************/ /* str */ -void mp_str_print_quoted(const mp_print_t *print, const byte *str_data, mp_uint_t str_len, bool is_bytes) { +void mp_str_print_quoted(const mp_print_t *print, const byte *str_data, size_t str_len, bool is_bytes) { // this escapes characters, but it will be very slow to print (calling print many times) bool has_single_quote = false; bool has_double_quote = false; @@ -231,11 +231,12 @@ STATIC mp_obj_t bytes_make_new(const mp_obj_type_t *type_in, size_t n_args, size vstr_init(&vstr, len); } - mp_obj_t iterable = mp_getiter(args[0]); + mp_obj_iter_buf_t iter_buf; + mp_obj_t iterable = mp_getiter(args[0], &iter_buf); mp_obj_t item; while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) { mp_int_t val = mp_obj_get_int(item); - #if MICROPY_CPYTHON_COMPAT + #if MICROPY_FULL_CHECKS if (val < 0 || val > 255) { mp_raise_ValueError("bytes value out of range"); } @@ -251,9 +252,9 @@ wrong_args: // like strstr but with specified length and allows \0 bytes // TODO replace with something more efficient/standard -const byte *find_subbytes(const byte *haystack, mp_uint_t hlen, const byte *needle, mp_uint_t nlen, mp_int_t direction) { +const byte *find_subbytes(const byte *haystack, size_t hlen, const byte *needle, size_t nlen, int direction) { if (hlen >= nlen) { - mp_uint_t str_index, str_index_end; + size_t str_index, str_index_end; if (direction > 0) { str_index = 0; str_index_end = hlen - nlen; @@ -282,19 +283,14 @@ const byte *find_subbytes(const byte *haystack, mp_uint_t hlen, const byte *need mp_obj_t mp_obj_str_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) { // check for modulo if (op == MP_BINARY_OP_MODULO) { - mp_obj_t *args; - mp_uint_t n_args; + mp_obj_t *args = &rhs_in; + size_t n_args = 1; mp_obj_t dict = MP_OBJ_NULL; if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_tuple)) { // TODO: Support tuple subclasses? mp_obj_tuple_get(rhs_in, &n_args, &args); } else if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_dict)) { - args = NULL; - n_args = 0; dict = rhs_in; - } else { - args = &rhs_in; - n_args = 1; } return str_modulo_format(lhs_in, n_args, args, dict); } @@ -338,7 +334,7 @@ mp_obj_t mp_obj_str_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) { // size and execution time so we don't. const byte *rhs_data; - mp_uint_t rhs_len; + size_t rhs_len; if (lhs_type == mp_obj_get_type(rhs_in)) { GET_STR_DATA_LEN(rhs_in, rhs_data_, rhs_len_); rhs_data = rhs_data_; @@ -358,6 +354,13 @@ mp_obj_t mp_obj_str_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) { switch (op) { case MP_BINARY_OP_ADD: case MP_BINARY_OP_INPLACE_ADD: { + if (lhs_len == 0 && mp_obj_get_type(rhs_in) == lhs_type) { + return rhs_in; + } + if (rhs_len == 0) { + return lhs_in; + } + vstr_t vstr; vstr_init_len(&vstr, lhs_len + rhs_len); memcpy(vstr.buf, lhs_data, lhs_len); @@ -385,7 +388,7 @@ mp_obj_t mp_obj_str_binary_op(mp_uint_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) { // objstrunicode defines own version const byte *str_index_to_ptr(const mp_obj_type_t *type, const byte *self_data, size_t self_len, mp_obj_t index, bool is_slice) { - mp_uint_t index_val = mp_get_index(type, self_len, index, is_slice); + size_t index_val = mp_get_index(type, self_len, index, is_slice); return self_data + index_val; } #endif @@ -405,7 +408,7 @@ STATIC mp_obj_t bytes_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { return mp_obj_new_str_of_type(type, self_data + slice.start, slice.stop - slice.start); } #endif - mp_uint_t index_val = mp_get_index(type, self_len, index, false); + size_t index_val = mp_get_index(type, self_len, index, false); // If we have unicode enabled the type will always be bytes, so take the short cut. if (MICROPY_PY_BUILTINS_STR_UNICODE || type == &mp_type_bytes) { return MP_OBJ_NEW_SMALL_INT(self_data[index_val]); @@ -425,22 +428,19 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) { GET_STR_DATA_LEN(self_in, sep_str, sep_len); // process args - mp_uint_t seq_len; + size_t seq_len; mp_obj_t *seq_items; - if (MP_OBJ_IS_TYPE(arg, &mp_type_tuple)) { - mp_obj_tuple_get(arg, &seq_len, &seq_items); - } else { - if (!MP_OBJ_IS_TYPE(arg, &mp_type_list)) { - // arg is not a list, try to convert it to one - // TODO: Try to optimize? - arg = mp_type_list.make_new(&mp_type_list, 1, 0, &arg); - } - mp_obj_list_get(arg, &seq_len, &seq_items); + + if (!MP_OBJ_IS_TYPE(arg, &mp_type_list) && !MP_OBJ_IS_TYPE(arg, &mp_type_tuple)) { + // arg is not a list nor a tuple, try to convert it to a list + // TODO: Try to optimize? + arg = mp_type_list.make_new(&mp_type_list, 1, 0, &arg); } + mp_obj_get_array(arg, &seq_len, &seq_items); // count required length - mp_uint_t required_len = 0; - for (mp_uint_t i = 0; i < seq_len; i++) { + size_t required_len = 0; + for (size_t i = 0; i < seq_len; i++) { if (mp_obj_get_type(seq_items[i]) != self_type) { mp_raise_TypeError( "join expects a list of str/bytes objects consistent with self object"); @@ -456,7 +456,7 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) { vstr_t vstr; vstr_init_len(&vstr, required_len); byte *data = (byte*)vstr.buf; - for (mp_uint_t i = 0; i < seq_len; i++) { + for (size_t i = 0; i < seq_len; i++) { if (i > 0) { memcpy(data, sep_str, sep_len); data += sep_len; @@ -513,7 +513,7 @@ mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args) { bad_implicit_conversion(sep); } - mp_uint_t sep_len; + size_t sep_len; const char *sep_str = mp_obj_str_get_data(sep, &sep_len); if (sep_len == 0) { @@ -611,7 +611,7 @@ STATIC mp_obj_t str_rsplit(size_t n_args, const mp_obj_t *args) { if (sep == mp_const_none) { mp_not_implemented("rsplit(None,n)"); } else { - mp_uint_t sep_len; + size_t sep_len; const char *sep_str = mp_obj_str_get_data(sep, &sep_len); if (sep_len == 0) { @@ -642,7 +642,7 @@ STATIC mp_obj_t str_rsplit(size_t n_args, const mp_obj_t *args) { } if (idx != 0) { // We split less parts than split limit, now go cleanup surplus - mp_int_t used = org_splits + 1 - idx; + size_t used = org_splits + 1 - idx; memmove(res->items, &res->items[idx], used * sizeof(mp_obj_t)); mp_seq_clear(res->items, used, res->alloc, sizeof(*res->items)); res->len = used; @@ -652,7 +652,7 @@ STATIC mp_obj_t str_rsplit(size_t n_args, const mp_obj_t *args) { return MP_OBJ_FROM_PTR(res); } -STATIC mp_obj_t str_finder(mp_uint_t n_args, const mp_obj_t *args, mp_int_t direction, bool is_index) { +STATIC mp_obj_t str_finder(size_t n_args, const mp_obj_t *args, int direction, bool is_index) { const mp_obj_type_t *self_type = mp_obj_get_type(args[0]); mp_check_self(MP_OBJ_IS_STR_OR_BYTES(args[0])); @@ -738,7 +738,7 @@ STATIC mp_obj_t str_endswith(size_t n_args, const mp_obj_t *args) { enum { LSTRIP, RSTRIP, STRIP }; -STATIC mp_obj_t str_uni_strip(int type, mp_uint_t n_args, const mp_obj_t *args) { +STATIC mp_obj_t str_uni_strip(int type, size_t n_args, const mp_obj_t *args) { mp_check_self(MP_OBJ_IS_STR_OR_BYTES(args[0])); const mp_obj_type_t *self_type = mp_obj_get_type(args[0]); @@ -760,16 +760,16 @@ STATIC mp_obj_t str_uni_strip(int type, mp_uint_t n_args, const mp_obj_t *args) GET_STR_DATA_LEN(args[0], orig_str, orig_str_len); - mp_uint_t first_good_char_pos = 0; + size_t first_good_char_pos = 0; bool first_good_char_pos_set = false; - mp_uint_t last_good_char_pos = 0; - mp_uint_t i = 0; - mp_int_t delta = 1; + size_t last_good_char_pos = 0; + size_t i = 0; + int delta = 1; if (type == RSTRIP) { i = orig_str_len - 1; delta = -1; } - for (mp_uint_t len = orig_str_len; len > 0; len--) { + for (size_t len = orig_str_len; len > 0; len--) { if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) { if (!first_good_char_pos_set) { first_good_char_pos_set = true; @@ -799,7 +799,7 @@ STATIC mp_obj_t str_uni_strip(int type, mp_uint_t n_args, const mp_obj_t *args) assert(last_good_char_pos >= first_good_char_pos); //+1 to accomodate the last character - mp_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1; + size_t stripped_len = last_good_char_pos - first_good_char_pos + 1; if (stripped_len == orig_str_len) { // If nothing was stripped, don't bother to dup original string // TODO: watch out for this case when we'll get to bytearray.strip() @@ -890,7 +890,7 @@ STATIC NORETURN void terse_str_format_value_error(void) { #define terse_str_format_value_error() #endif -STATIC vstr_t mp_obj_str_format_helper(const char *str, const char *top, int *arg_i, mp_uint_t n_args, const mp_obj_t *args, mp_map_t *kwargs) { +STATIC vstr_t mp_obj_str_format_helper(const char *str, const char *top, int *arg_i, size_t n_args, const mp_obj_t *args, mp_map_t *kwargs) { vstr_t vstr; mp_print_t print; vstr_init_print(&vstr, 16, &print); @@ -1306,12 +1306,12 @@ STATIC vstr_t mp_obj_str_format_helper(const char *str, const char *top, int *ar switch (type) { case '\0': // no explicit format type implies 's' case 's': { - mp_uint_t slen; + size_t slen; const char *s = mp_obj_str_get_data(arg, &slen); if (precision < 0) { precision = slen; } - if (slen > (mp_uint_t)precision) { + if (slen > (size_t)precision) { slen = precision; } mp_print_strn(&print, s, slen, flags, fill, width); @@ -1342,13 +1342,13 @@ mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs return mp_obj_new_str_from_vstr(&mp_type_str, &vstr); } -STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, mp_uint_t n_args, const mp_obj_t *args, mp_obj_t dict) { +STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, size_t n_args, const mp_obj_t *args, mp_obj_t dict) { mp_check_self(MP_OBJ_IS_STR_OR_BYTES(pattern)); GET_STR_DATA_LEN(pattern, str, len); const byte *start_str = str; bool is_bytes = MP_OBJ_IS_TYPE(pattern, &mp_type_bytes); - int arg_i = 0; + size_t arg_i = 0; vstr_t vstr; mp_print_t print; vstr_init_print(&vstr, 16, &print); @@ -1369,6 +1369,10 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, mp_uint_t n_args, const mp_o // Dictionary value lookup if (*str == '(') { + if (dict == MP_OBJ_NULL) { + mp_raise_TypeError("format requires a dict"); + } + arg_i = 1; // we used up the single dict argument const byte *key = ++str; while (*str != ')') { if (str >= top) { @@ -1403,7 +1407,7 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, mp_uint_t n_args, const mp_o int width = 0; if (str < top) { if (*str == '*') { - if ((uint)arg_i >= n_args) { + if (arg_i >= n_args) { goto not_enough_args; } width = mp_obj_get_int(args[arg_i++]); @@ -1416,7 +1420,7 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, mp_uint_t n_args, const mp_o if (str < top && *str == '.') { if (++str < top) { if (*str == '*') { - if ((uint)arg_i >= n_args) { + if (arg_i >= n_args) { goto not_enough_args; } prec = mp_obj_get_int(args[arg_i++]); @@ -1439,7 +1443,7 @@ incomplete_format: // Tuple value lookup if (arg == MP_OBJ_NULL) { - if ((uint)arg_i >= n_args) { + if (arg_i >= n_args) { not_enough_args: mp_raise_TypeError("not enough arguments for format string"); } @@ -1448,7 +1452,7 @@ not_enough_args: switch (*str) { case 'c': if (MP_OBJ_IS_STR(arg)) { - mp_uint_t slen; + size_t slen; const char *s = mp_obj_str_get_data(arg, &slen); if (slen != 1) { mp_raise_TypeError("%%c requires int or char"); @@ -1527,7 +1531,7 @@ not_enough_args: } } - if ((uint)arg_i != n_args) { + if (arg_i != n_args) { mp_raise_TypeError("not all arguments converted during string formatting"); } @@ -1582,11 +1586,11 @@ STATIC mp_obj_t str_replace(size_t n_args, const mp_obj_t *args) { // first pass computes the required length of the replaced string // second pass does the replacements for (;;) { - mp_uint_t replaced_str_index = 0; - mp_uint_t num_replacements_done = 0; + size_t replaced_str_index = 0; + size_t num_replacements_done = 0; const byte *old_occurrence; const byte *offset_ptr = str; - mp_uint_t str_len_remain = str_len; + size_t str_len_remain = str_len; if (old_len == 0) { // if old_str is empty, copy new_str to start of replaced string // copy the replacement string @@ -1596,7 +1600,7 @@ STATIC mp_obj_t str_replace(size_t n_args, const mp_obj_t *args) { replaced_str_index += new_len; num_replacements_done++; } - while (num_replacements_done != (mp_uint_t)max_rep && str_len_remain > 0 && (old_occurrence = find_subbytes(offset_ptr, str_len_remain, old, old_len, 1)) != NULL) { + while (num_replacements_done != (size_t)max_rep && str_len_remain > 0 && (old_occurrence = find_subbytes(offset_ptr, str_len_remain, old, old_len, 1)) != NULL) { if (old_len == 0) { old_occurrence += 1; } @@ -1682,7 +1686,7 @@ STATIC mp_obj_t str_count(size_t n_args, const mp_obj_t *args) { } #if MICROPY_PY_BUILTINS_STR_PARTITION -STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, mp_int_t direction) { +STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, int direction) { mp_check_self(MP_OBJ_IS_STR_OR_BYTES(self_in)); mp_obj_type_t *self_type = mp_obj_get_type(self_in); if (self_type != mp_obj_get_type(arg)) { @@ -1715,7 +1719,7 @@ STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, mp_int_t directi const byte *position_ptr = find_subbytes(str, str_len, sep, sep_len, direction); if (position_ptr != NULL) { - mp_uint_t position = position_ptr - str; + size_t position = position_ptr - str; result[0] = mp_obj_new_str_of_type(self_type, str, position); result[1] = arg; result[2] = mp_obj_new_str_of_type(self_type, str + position + sep_len, str_len - position - sep_len); @@ -1739,7 +1743,7 @@ STATIC mp_obj_t str_caseconv(unichar (*op)(unichar), mp_obj_t self_in) { vstr_t vstr; vstr_init_len(&vstr, self_len); byte *data = (byte*)vstr.buf; - for (mp_uint_t i = 0; i < self_len; i++) { + for (size_t i = 0; i < self_len; i++) { *data++ = op(*self_data++); } return mp_obj_new_str_from_vstr(mp_obj_get_type(self_in), &vstr); @@ -1761,7 +1765,7 @@ STATIC mp_obj_t str_uni_istype(bool (*f)(unichar), mp_obj_t self_in) { } if (f != unichar_isupper && f != unichar_islower) { - for (mp_uint_t i = 0; i < self_len; i++) { + for (size_t i = 0; i < self_len; i++) { if (!f(*self_data++)) { return mp_const_false; } @@ -1769,7 +1773,7 @@ STATIC mp_obj_t str_uni_istype(bool (*f)(unichar), mp_obj_t self_in) { } else { bool contains_alpha = false; - for (mp_uint_t i = 0; i < self_len; i++) { // only check alphanumeric characters + for (size_t i = 0; i < self_len; i++) { // only check alphanumeric characters if (unichar_isalpha(*self_data++)) { contains_alpha = true; if (!f(*(self_data - 1))) { // -1 because we already incremented above @@ -1936,7 +1940,7 @@ STATIC const mp_rom_map_elem_t str8_locals_dict_table[] = { STATIC MP_DEFINE_CONST_DICT(str8_locals_dict, str8_locals_dict_table); #if !MICROPY_PY_BUILTINS_STR_UNICODE -STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str); +STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf); const mp_obj_type_t mp_type_str = { { &mp_type_type }, @@ -2013,7 +2017,7 @@ mp_obj_t mp_obj_new_str_from_vstr(const mp_obj_type_t *type, vstr_t *vstr) { return MP_OBJ_FROM_PTR(o); } -mp_obj_t mp_obj_new_str(const char* data, mp_uint_t len, bool make_qstr_if_not_already) { +mp_obj_t mp_obj_new_str(const char* data, size_t len, bool make_qstr_if_not_already) { if (make_qstr_if_not_already) { // use existing, or make a new qstr return MP_OBJ_NEW_QSTR(qstr_from_strn(data, len)); @@ -2034,7 +2038,7 @@ mp_obj_t mp_obj_str_intern(mp_obj_t str) { return MP_OBJ_NEW_QSTR(qstr_from_strn((const char*)data, len)); } -mp_obj_t mp_obj_new_bytes(const byte* data, mp_uint_t len) { +mp_obj_t mp_obj_new_bytes(const byte* data, size_t len) { return mp_obj_new_str_of_type(&mp_type_bytes, data, len); } @@ -2061,9 +2065,10 @@ STATIC void bad_implicit_conversion(mp_obj_t self_in) { if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) { mp_raise_TypeError("can't convert to str implicitly"); } else { + const qstr src_name = mp_obj_get_type(self_in)->name; nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, - "can't convert '%s' object to str implicitly", - mp_obj_get_type_str(self_in))); + "can't convert '%q' object to %q implicitly", + src_name, src_name == MP_QSTR_str ? MP_QSTR_bytes : MP_QSTR_str)); } } @@ -2092,7 +2097,7 @@ const char *mp_obj_str_get_str(mp_obj_t self_in) { } } -const char *mp_obj_str_get_data(mp_obj_t self_in, mp_uint_t *len) { +const char *mp_obj_str_get_data(mp_obj_t self_in, size_t *len) { if (MP_OBJ_IS_STR_OR_BYTES(self_in)) { GET_STR_DATA_LEN(self_in, s, l); *len = l; @@ -2120,7 +2125,7 @@ typedef struct _mp_obj_str8_it_t { mp_obj_base_t base; mp_fun_1_t iternext; mp_obj_t str; - mp_uint_t cur; + size_t cur; } mp_obj_str8_it_t; #if !MICROPY_PY_BUILTINS_STR_UNICODE @@ -2136,8 +2141,9 @@ STATIC mp_obj_t str_it_iternext(mp_obj_t self_in) { } } -STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str) { - mp_obj_str8_it_t *o = m_new_obj(mp_obj_str8_it_t); +STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf) { + assert(sizeof(mp_obj_str8_it_t) <= sizeof(mp_obj_iter_buf_t)); + mp_obj_str8_it_t *o = (mp_obj_str8_it_t*)iter_buf; o->base.type = &mp_type_polymorph_iter; o->iternext = str_it_iternext; o->str = str; @@ -2158,8 +2164,9 @@ STATIC mp_obj_t bytes_it_iternext(mp_obj_t self_in) { } } -mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str) { - mp_obj_str8_it_t *o = m_new_obj(mp_obj_str8_it_t); +mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf) { + assert(sizeof(mp_obj_str8_it_t) <= sizeof(mp_obj_iter_buf_t)); + mp_obj_str8_it_t *o = (mp_obj_str8_it_t*)iter_buf; o->base.type = &mp_type_polymorph_iter; o->iternext = bytes_it_iternext; o->str = str; |