diff options
-rw-r--r-- | Objects/bytes_methods.c | 70 | ||||
-rw-r--r-- | Objects/stringlib/find_max_char.h | 10 |
2 files changed, 25 insertions, 55 deletions
diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c index 981aa571643..55252406578 100644 --- a/Objects/bytes_methods.c +++ b/Objects/bytes_methods.c @@ -92,57 +92,6 @@ _Py_bytes_isalnum(const char *cptr, Py_ssize_t len) } -PyDoc_STRVAR_shared(_Py_isascii__doc__, -"B.isascii() -> bool\n\ -\n\ -Return True if B is empty or all characters in B are ASCII,\n\ -False otherwise."); - -// Optimization is copied from ascii_decode in unicodeobject.c -/* Mask to quickly check whether a C 'size_t' contains a - non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_SIZE_T == 8) -# define ASCII_CHAR_MASK 0x8080808080808080ULL -#elif (SIZEOF_SIZE_T == 4) -# define ASCII_CHAR_MASK 0x80808080U -#else -# error C 'size_t' size should be either 4 or 8! -#endif - -PyObject* -_Py_bytes_isascii(const char *cptr, Py_ssize_t len) -{ - const char *p = cptr; - const char *end = p + len; - - while (p < end) { - /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h - for an explanation. */ - if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) { - /* Help allocation */ - const char *_p = p; - while (_p + SIZEOF_SIZE_T <= end) { - size_t value = *(const size_t *) _p; - if (value & ASCII_CHAR_MASK) { - Py_RETURN_FALSE; - } - _p += SIZEOF_SIZE_T; - } - p = _p; - if (_p == end) - break; - } - if ((unsigned char)*p & 0x80) { - Py_RETURN_FALSE; - } - p++; - } - Py_RETURN_TRUE; -} - -#undef ASCII_CHAR_MASK - - PyDoc_STRVAR_shared(_Py_isdigit__doc__, "B.isdigit() -> bool\n\ \n\ @@ -438,6 +387,7 @@ _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to) #include "stringlib/fastsearch.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/find_max_char.h" /* Wraps stringlib_parse_args_finds() and additionally checks the first @@ -765,3 +715,21 @@ _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *subobj, { return _Py_bytes_tailmatch(str, len, "endswith", subobj, start, end, +1); } + +PyDoc_STRVAR_shared(_Py_isascii__doc__, +"B.isascii() -> bool\n\ +\n\ +Return True if B is empty or all characters in B are ASCII,\n\ +False otherwise."); + +PyObject* +_Py_bytes_isascii(const char *cptr, Py_ssize_t len) +{ + const char *p = cptr; + const char *end = p + len; + Py_ssize_t max_char = stringlib_find_max_char(cptr, end); + if (max_char > 127) { + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; +} diff --git a/Objects/stringlib/find_max_char.h b/Objects/stringlib/find_max_char.h index b9ffdfc2e35..7ab3fc88b33 100644 --- a/Objects/stringlib/find_max_char.h +++ b/Objects/stringlib/find_max_char.h @@ -1,6 +1,7 @@ /* Finding the optimal width of unicode characters in a buffer */ -#if !STRINGLIB_IS_UNICODE +/* find_max_char for one-byte will work for bytes objects as well. */ +#if !STRINGLIB_IS_UNICODE && STRINGLIB_SIZEOF_CHAR > 1 # error "find_max_char.h is specific to Unicode" #endif @@ -20,19 +21,20 @@ Py_LOCAL_INLINE(Py_UCS4) STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) { const unsigned char *p = (const unsigned char *) begin; + const unsigned char *_end = (const unsigned char *)end; - while (p < end) { + while (p < _end) { if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) { /* Help register allocation */ const unsigned char *_p = p; - while (_p + SIZEOF_SIZE_T <= end) { + while (_p + SIZEOF_SIZE_T <= _end) { size_t value = *(const size_t *) _p; if (value & UCS1_ASCII_CHAR_MASK) return 255; _p += SIZEOF_SIZE_T; } p = _p; - if (p == end) + if (p == _end) break; } if (*p++ & 0x80) |