diff options
Diffstat (limited to 'py/objstr.c')
-rw-r--r-- | py/objstr.c | 93 |
1 files changed, 91 insertions, 2 deletions
diff --git a/py/objstr.c b/py/objstr.c index 83fd002d1e..27f6d9cd6f 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -667,6 +667,7 @@ STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) { for (machine_uint_t len = orig_str_len; len > 0; len--) { if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) { if (!first_good_char_pos_set) { + first_good_char_pos_set = true; first_good_char_pos = i; if (type == LSTRIP) { last_good_char_pos = orig_str_len - 1; @@ -676,14 +677,13 @@ STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) { last_good_char_pos = i; break; } - first_good_char_pos_set = true; } last_good_char_pos = i; } i += delta; } - if (first_good_char_pos == 0 && last_good_char_pos == 0) { + if (!first_good_char_pos_set) { // string is all whitespace, return '' return MP_OBJ_NEW_QSTR(MP_QSTR_); } @@ -691,6 +691,12 @@ STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) { assert(last_good_char_pos >= first_good_char_pos); //+1 to accomodate the last character machine_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1; + if (stripped_len == orig_str_len) { + // If nothing was stripped, don't bother to dup original string + // TODO: watch out for this case when we'll get to bytearray.strip() + assert(first_good_char_pos == 0); + return args[0]; + } return mp_obj_new_str_of_type(self_type, orig_str + first_good_char_pos, stripped_len); } @@ -1496,6 +1502,79 @@ STATIC mp_obj_t str_upper(mp_obj_t self_in) { return str_caseconv(CASE_UPPER, self_in); } +enum { IS_SPACE, IS_ALPHA, IS_DIGIT, IS_UPPER, IS_LOWER }; + +STATIC mp_obj_t str_uni_istype(int type, mp_obj_t self_in) { + GET_STR_DATA_LEN(self_in, self_data, self_len); + + if (self_len == 0) { + return mp_const_false; // default to False for empty str + } + + typedef bool (*check_function)(unichar); + check_function f; + + if (type != IS_UPPER && type != IS_LOWER) { + switch (type) { + case IS_SPACE: f = &unichar_isspace; break; + case IS_ALPHA: f = &unichar_isalpha; break; + case IS_DIGIT: f = &unichar_isdigit; break; + default: + nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "unknown type provided for str_uni_istype")); + } + + for (int i = 0; i < self_len; i++) { + if (!f(*self_data++)) { + return mp_const_false; + } + } + } else { + switch (type) { + case IS_UPPER: f = &unichar_isupper; break; + case IS_LOWER: f = &unichar_islower; break; + default: + nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "unknown type provided for str_uni_istype")); + } + + bool contains_alpha = false; + + for (int i = 0; i < self_len; i++) { // only check alphanumeric characters + if (unichar_isalpha(*self_data++)) { + contains_alpha = true; + if (!f(*(self_data-1))) { + return mp_const_false; // we already incremented + } + } + } + + if (!contains_alpha) { + return mp_const_false; + } + } + + return mp_const_true; +} + +STATIC mp_obj_t str_isspace(mp_obj_t self_in) { + return str_uni_istype(IS_SPACE, self_in); +} + +STATIC mp_obj_t str_isalpha(mp_obj_t self_in) { + return str_uni_istype(IS_ALPHA, self_in); +} + +STATIC mp_obj_t str_isdigit(mp_obj_t self_in) { + return str_uni_istype(IS_DIGIT, self_in); +} + +STATIC mp_obj_t str_isupper(mp_obj_t self_in) { + return str_uni_istype(IS_UPPER, self_in); +} + +STATIC mp_obj_t str_islower(mp_obj_t self_in) { + return str_uni_istype(IS_LOWER, self_in); +} + #if MICROPY_CPYTHON_COMPAT // These methods are superfluous in the presense of str() and bytes() // constructors. @@ -1563,6 +1642,11 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition); STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition); STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_lower_obj, str_lower); STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_upper_obj, str_upper); +STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isspace_obj, str_isspace); +STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isalpha_obj, str_isalpha); +STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isdigit_obj, str_isdigit); +STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isupper_obj, str_isupper); +STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_islower_obj, str_islower); STATIC const mp_map_elem_t str_locals_dict_table[] = { #if MICROPY_CPYTHON_COMPAT @@ -1588,6 +1672,11 @@ STATIC const mp_map_elem_t str_locals_dict_table[] = { { MP_OBJ_NEW_QSTR(MP_QSTR_rpartition), (mp_obj_t)&str_rpartition_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_lower), (mp_obj_t)&str_lower_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_upper), (mp_obj_t)&str_upper_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_isspace), (mp_obj_t)&str_isspace_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_isalpha), (mp_obj_t)&str_isalpha_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_isdigit), (mp_obj_t)&str_isdigit_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_isupper), (mp_obj_t)&str_isupper_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_islower), (mp_obj_t)&str_islower_obj }, }; STATIC MP_DEFINE_CONST_DICT(str_locals_dict, str_locals_dict_table); |