diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 127 |
1 files changed, 67 insertions, 60 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b3287546dd9..5c2308a0121 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -56,6 +56,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "pycore_pyhash.h" // _Py_HashSecret_t #include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding() #include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "pycore_template.h" // _PyTemplate_Concat() #include "pycore_tuple.h" // _PyTuple_FromArray() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI #include "pycore_unicodeobject.h" // struct _Py_unicode_state @@ -166,11 +167,7 @@ static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length) #define _PyUnicode_HASH(op) \ (_PyASCIIObject_CAST(op)->hash) -static inline Py_hash_t PyUnicode_HASH(PyObject *op) -{ - assert(_PyUnicode_CHECK(op)); - return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash); -} +#define PyUnicode_HASH PyUnstable_Unicode_GET_CACHED_HASH static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash) { @@ -3729,7 +3726,7 @@ PyUnicode_Decode(const char *s, return NULL; } -PyObject * +PyAPI_FUNC(PyObject *) PyUnicode_AsDecodedObject(PyObject *unicode, const char *encoding, const char *errors) @@ -3739,12 +3736,6 @@ PyUnicode_AsDecodedObject(PyObject *unicode, return NULL; } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsDecodedObject() is deprecated " - "and will be removed in 3.15; " - "use PyCodec_Decode() to decode from str", 1) < 0) - return NULL; - if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -3752,7 +3743,7 @@ PyUnicode_AsDecodedObject(PyObject *unicode, return PyCodec_Decode(unicode, encoding, errors); } -PyObject * +PyAPI_FUNC(PyObject *) PyUnicode_AsDecodedUnicode(PyObject *unicode, const char *encoding, const char *errors) @@ -3764,12 +3755,6 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode, goto onError; } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsDecodedUnicode() is deprecated " - "and will be removed in 3.15; " - "use PyCodec_Decode() to decode from str to str", 1) < 0) - return NULL; - if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -3792,7 +3777,7 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode, return NULL; } -PyObject * +PyAPI_FUNC(PyObject *) PyUnicode_AsEncodedObject(PyObject *unicode, const char *encoding, const char *errors) @@ -3804,13 +3789,6 @@ PyUnicode_AsEncodedObject(PyObject *unicode, goto onError; } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsEncodedObject() is deprecated " - "and will be removed in 3.15; " - "use PyUnicode_AsEncodedString() to encode from str to bytes " - "or PyCodec_Encode() for generic encoding", 1) < 0) - return NULL; - if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -4016,7 +3994,7 @@ PyUnicode_AsEncodedString(PyObject *unicode, return NULL; } -PyObject * +PyAPI_FUNC(PyObject *) PyUnicode_AsEncodedUnicode(PyObject *unicode, const char *encoding, const char *errors) @@ -4028,12 +4006,6 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode, goto onError; } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsEncodedUnicode() is deprecated " - "and will be removed in 3.15; " - "use PyCodec_Encode() to encode from str to str", 1) < 0) - return NULL; - if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); @@ -6620,13 +6592,15 @@ _PyUnicode_GetNameCAPI(void) /* --- Unicode Escape Codec ----------------------------------------------- */ PyObject * -_PyUnicode_DecodeUnicodeEscapeInternal(const char *s, +_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s, Py_ssize_t size, const char *errors, Py_ssize_t *consumed, - const char **first_invalid_escape) + int *first_invalid_escape_char, + const char **first_invalid_escape_ptr) { const char *starts = s; + const char *initial_starts = starts; _PyUnicodeWriter writer; const char *end; PyObject *errorHandler = NULL; @@ -6634,7 +6608,8 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, _PyUnicode_Name_CAPI *ucnhash_capi; // so we can remember if we've seen an invalid escape char or not - *first_invalid_escape = NULL; + *first_invalid_escape_char = -1; + *first_invalid_escape_ptr = NULL; if (size == 0) { if (consumed) { @@ -6722,9 +6697,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, } } if (ch > 0377) { - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-3; /* Back up 3 chars, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = ch; + if (starts == initial_starts) { + /* Back up 3 chars, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 3; + } } } WRITE_CHAR(ch); @@ -6819,9 +6797,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, goto error; default: - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-1; /* Back up one char, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = c; + if (starts == initial_starts) { + /* Back up one char, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 1; + } } WRITE_ASCII_CHAR('\\'); WRITE_CHAR(c); @@ -6866,19 +6847,20 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, const char *errors, Py_ssize_t *consumed) { - const char *first_invalid_escape; - PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors, + int first_invalid_escape_char; + const char *first_invalid_escape_ptr; + PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors, consumed, - &first_invalid_escape); + &first_invalid_escape_char, + &first_invalid_escape_ptr); if (result == NULL) return NULL; - if (first_invalid_escape != NULL) { - unsigned char c = *first_invalid_escape; - if ('4' <= c && c <= '7') { + if (first_invalid_escape_char != -1) { + if (first_invalid_escape_char > 0xff) { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "\"\\%.3s\" is an invalid octal escape sequence. " + "\"\\%o\" is an invalid octal escape sequence. " "Such sequences will not work in the future. ", - first_invalid_escape) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -6888,7 +6870,7 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "\"\\%c\" is an invalid escape sequence. " "Such sequences will not work in the future. ", - c) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -11628,10 +11610,16 @@ PyUnicode_Concat(PyObject *left, PyObject *right) return NULL; if (!PyUnicode_Check(right)) { - PyErr_Format(PyExc_TypeError, - "can only concatenate str (not \"%.200s\") to str", - Py_TYPE(right)->tp_name); - return NULL; + if (_PyTemplate_CheckExact(right)) { + // str + tstring is implemented in the tstring type + return _PyTemplate_Concat(left, right); + } + else { + PyErr_Format(PyExc_TypeError, + "can only concatenate str (not \"%.200s\") to str", + Py_TYPE(right)->tp_name); + return NULL; + } } /* Shortcuts */ @@ -13937,7 +13925,12 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) { - if (Py_TYPE(obj) == &PyLong_Type) { + PyTypeObject *type = Py_TYPE(obj); + if (type == &PyUnicode_Type) { + return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj); + } + + if (type == &PyLong_Type) { return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); } @@ -14086,6 +14079,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, return 0; } + +int +PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size) +{ + assert(writer != NULL); + _Py_AssertHoldsTstate(); + + _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer; + return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size); +} + + int PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, const char *str, @@ -15911,7 +15918,7 @@ immortalize_interned(PyObject *s) _Py_DecRefTotal(_PyThreadState_GET()); } #endif - FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL); + FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL); _Py_SetImmortal(s); } @@ -16029,7 +16036,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, Py_DECREF(s); Py_DECREF(s); } - FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL); + FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL); /* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */ @@ -16165,7 +16172,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) Py_UNREACHABLE(); } if (!shared) { - FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_NOT_INTERNED); + FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_NOT_INTERNED); } } #ifdef INTERNED_STATS |