summaryrefslogtreecommitdiffstatshomepage
path: root/py/objstr.c
diff options
context:
space:
mode:
Diffstat (limited to 'py/objstr.c')
-rw-r--r--py/objstr.c93
1 files changed, 91 insertions, 2 deletions
diff --git a/py/objstr.c b/py/objstr.c
index 83fd002d1e..27f6d9cd6f 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -667,6 +667,7 @@ STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) {
for (machine_uint_t len = orig_str_len; len > 0; len--) {
if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) {
if (!first_good_char_pos_set) {
+ first_good_char_pos_set = true;
first_good_char_pos = i;
if (type == LSTRIP) {
last_good_char_pos = orig_str_len - 1;
@@ -676,14 +677,13 @@ STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) {
last_good_char_pos = i;
break;
}
- first_good_char_pos_set = true;
}
last_good_char_pos = i;
}
i += delta;
}
- if (first_good_char_pos == 0 && last_good_char_pos == 0) {
+ if (!first_good_char_pos_set) {
// string is all whitespace, return ''
return MP_OBJ_NEW_QSTR(MP_QSTR_);
}
@@ -691,6 +691,12 @@ STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) {
assert(last_good_char_pos >= first_good_char_pos);
//+1 to accomodate the last character
machine_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
+ if (stripped_len == orig_str_len) {
+ // If nothing was stripped, don't bother to dup original string
+ // TODO: watch out for this case when we'll get to bytearray.strip()
+ assert(first_good_char_pos == 0);
+ return args[0];
+ }
return mp_obj_new_str_of_type(self_type, orig_str + first_good_char_pos, stripped_len);
}
@@ -1496,6 +1502,79 @@ STATIC mp_obj_t str_upper(mp_obj_t self_in) {
return str_caseconv(CASE_UPPER, self_in);
}
+enum { IS_SPACE, IS_ALPHA, IS_DIGIT, IS_UPPER, IS_LOWER };
+
+STATIC mp_obj_t str_uni_istype(int type, mp_obj_t self_in) {
+ GET_STR_DATA_LEN(self_in, self_data, self_len);
+
+ if (self_len == 0) {
+ return mp_const_false; // default to False for empty str
+ }
+
+ typedef bool (*check_function)(unichar);
+ check_function f;
+
+ if (type != IS_UPPER && type != IS_LOWER) {
+ switch (type) {
+ case IS_SPACE: f = &unichar_isspace; break;
+ case IS_ALPHA: f = &unichar_isalpha; break;
+ case IS_DIGIT: f = &unichar_isdigit; break;
+ default:
+ nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "unknown type provided for str_uni_istype"));
+ }
+
+ for (int i = 0; i < self_len; i++) {
+ if (!f(*self_data++)) {
+ return mp_const_false;
+ }
+ }
+ } else {
+ switch (type) {
+ case IS_UPPER: f = &unichar_isupper; break;
+ case IS_LOWER: f = &unichar_islower; break;
+ default:
+ nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "unknown type provided for str_uni_istype"));
+ }
+
+ bool contains_alpha = false;
+
+ for (int i = 0; i < self_len; i++) { // only check alphanumeric characters
+ if (unichar_isalpha(*self_data++)) {
+ contains_alpha = true;
+ if (!f(*(self_data-1))) {
+ return mp_const_false; // we already incremented
+ }
+ }
+ }
+
+ if (!contains_alpha) {
+ return mp_const_false;
+ }
+ }
+
+ return mp_const_true;
+}
+
+STATIC mp_obj_t str_isspace(mp_obj_t self_in) {
+ return str_uni_istype(IS_SPACE, self_in);
+}
+
+STATIC mp_obj_t str_isalpha(mp_obj_t self_in) {
+ return str_uni_istype(IS_ALPHA, self_in);
+}
+
+STATIC mp_obj_t str_isdigit(mp_obj_t self_in) {
+ return str_uni_istype(IS_DIGIT, self_in);
+}
+
+STATIC mp_obj_t str_isupper(mp_obj_t self_in) {
+ return str_uni_istype(IS_UPPER, self_in);
+}
+
+STATIC mp_obj_t str_islower(mp_obj_t self_in) {
+ return str_uni_istype(IS_LOWER, self_in);
+}
+
#if MICROPY_CPYTHON_COMPAT
// These methods are superfluous in the presense of str() and bytes()
// constructors.
@@ -1563,6 +1642,11 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition);
STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition);
STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_lower_obj, str_lower);
STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_upper_obj, str_upper);
+STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isspace_obj, str_isspace);
+STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isalpha_obj, str_isalpha);
+STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isdigit_obj, str_isdigit);
+STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_isupper_obj, str_isupper);
+STATIC MP_DEFINE_CONST_FUN_OBJ_1(str_islower_obj, str_islower);
STATIC const mp_map_elem_t str_locals_dict_table[] = {
#if MICROPY_CPYTHON_COMPAT
@@ -1588,6 +1672,11 @@ STATIC const mp_map_elem_t str_locals_dict_table[] = {
{ MP_OBJ_NEW_QSTR(MP_QSTR_rpartition), (mp_obj_t)&str_rpartition_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_lower), (mp_obj_t)&str_lower_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_upper), (mp_obj_t)&str_upper_obj },
+ { MP_OBJ_NEW_QSTR(MP_QSTR_isspace), (mp_obj_t)&str_isspace_obj },
+ { MP_OBJ_NEW_QSTR(MP_QSTR_isalpha), (mp_obj_t)&str_isalpha_obj },
+ { MP_OBJ_NEW_QSTR(MP_QSTR_isdigit), (mp_obj_t)&str_isdigit_obj },
+ { MP_OBJ_NEW_QSTR(MP_QSTR_isupper), (mp_obj_t)&str_isupper_obj },
+ { MP_OBJ_NEW_QSTR(MP_QSTR_islower), (mp_obj_t)&str_islower_obj },
};
STATIC MP_DEFINE_CONST_DICT(str_locals_dict, str_locals_dict_table);