diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 106 |
1 files changed, 53 insertions, 53 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index eb3e1c48fd4..5c2308a0121 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -167,11 +167,7 @@ static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length) #define _PyUnicode_HASH(op) \ (_PyASCIIObject_CAST(op)->hash) -static inline Py_hash_t PyUnicode_HASH(PyObject *op) -{ - assert(_PyUnicode_CHECK(op)); - return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash); -} +#define PyUnicode_HASH PyUnstable_Unicode_GET_CACHED_HASH static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash) { @@ -3730,7 +3726,7 @@ PyUnicode_Decode(const char *s, return NULL; } -PyObject * +PyAPI_FUNC(PyObject *) PyUnicode_AsDecodedObject(PyObject *unicode, const char *encoding, const char *errors) @@ -3740,12 +3736,6 @@ PyUnicode_AsDecodedObject(PyObject *unicode, return NULL; } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsDecodedObject() is deprecated " - "and will be removed in 3.15; " - "use PyCodec_Decode() to decode from str", 1) < 0) - return NULL; - if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -3753,7 +3743,7 @@ PyUnicode_AsDecodedObject(PyObject *unicode, return PyCodec_Decode(unicode, encoding, errors); } -PyObject * +PyAPI_FUNC(PyObject *) PyUnicode_AsDecodedUnicode(PyObject *unicode, const char *encoding, const char *errors) @@ -3765,12 +3755,6 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode, goto onError; } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsDecodedUnicode() is deprecated " - "and will be removed in 3.15; " - "use PyCodec_Decode() to decode from str to str", 1) < 0) - return NULL; - if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -3793,7 +3777,7 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode, return NULL; } -PyObject * +PyAPI_FUNC(PyObject *) PyUnicode_AsEncodedObject(PyObject *unicode, const char *encoding, const char *errors) @@ -3805,13 +3789,6 @@ PyUnicode_AsEncodedObject(PyObject *unicode, goto onError; } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsEncodedObject() is deprecated " - "and will be removed in 3.15; " - "use PyUnicode_AsEncodedString() to encode from str to bytes " - "or PyCodec_Encode() for generic encoding", 1) < 0) - return NULL; - if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -4017,7 +3994,7 @@ PyUnicode_AsEncodedString(PyObject *unicode, return NULL; } -PyObject * +PyAPI_FUNC(PyObject *) PyUnicode_AsEncodedUnicode(PyObject *unicode, const char *encoding, const char *errors) @@ -4029,12 +4006,6 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode, goto onError; } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsEncodedUnicode() is deprecated " - "and will be removed in 3.15; " - "use PyCodec_Encode() to encode from str to str", 1) < 0) - return NULL; - if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -6621,13 +6592,15 @@ _PyUnicode_GetNameCAPI(void) /* --- Unicode Escape Codec ----------------------------------------------- */ PyObject * -_PyUnicode_DecodeUnicodeEscapeInternal(const char *s, +_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s, Py_ssize_t size, const char *errors, Py_ssize_t *consumed, - const char **first_invalid_escape) + int *first_invalid_escape_char, + const char **first_invalid_escape_ptr) { const char *starts = s; + const char *initial_starts = starts; _PyUnicodeWriter writer; const char *end; PyObject *errorHandler = NULL; @@ -6635,7 +6608,8 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, _PyUnicode_Name_CAPI *ucnhash_capi; // so we can remember if we've seen an invalid escape char or not - *first_invalid_escape = NULL; + *first_invalid_escape_char = -1; + *first_invalid_escape_ptr = NULL; if (size == 0) { if (consumed) { @@ -6723,9 +6697,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, } } if (ch > 0377) { - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-3; /* Back up 3 chars, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = ch; + if (starts == initial_starts) { + /* Back up 3 chars, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 3; + } } } WRITE_CHAR(ch); @@ -6820,9 +6797,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, goto error; default: - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-1; /* Back up one char, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = c; + if (starts == initial_starts) { + /* Back up one char, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 1; + } } WRITE_ASCII_CHAR('\\'); WRITE_CHAR(c); @@ -6867,19 +6847,20 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, const char *errors, Py_ssize_t *consumed) { - const char *first_invalid_escape; - PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors, + int first_invalid_escape_char; + const char *first_invalid_escape_ptr; + PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors, consumed, - &first_invalid_escape); + &first_invalid_escape_char, + &first_invalid_escape_ptr); if (result == NULL) return NULL; - if (first_invalid_escape != NULL) { - unsigned char c = *first_invalid_escape; - if ('4' <= c && c <= '7') { + if (first_invalid_escape_char != -1) { + if (first_invalid_escape_char > 0xff) { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "\"\\%.3s\" is an invalid octal escape sequence. " + "\"\\%o\" is an invalid octal escape sequence. " "Such sequences will not work in the future. ", - first_invalid_escape) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -6889,7 +6870,7 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "\"\\%c\" is an invalid escape sequence. " "Such sequences will not work in the future. ", - c) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -13944,7 +13925,12 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) { - if (Py_TYPE(obj) == &PyLong_Type) { + PyTypeObject *type = Py_TYPE(obj); + if (type == &PyUnicode_Type) { + return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj); + } + + if (type == &PyLong_Type) { return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); } @@ -14093,6 +14079,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, return 0; } + +int +PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size) +{ + assert(writer != NULL); + _Py_AssertHoldsTstate(); + + _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer; + return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size); +} + + int PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, const char *str, |