aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c127
1 files changed, 67 insertions, 60 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index b3287546dd9..5c2308a0121 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -56,6 +56,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include "pycore_pyhash.h" // _Py_HashSecret_t
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
#include "pycore_pystate.h" // _PyInterpreterState_GET()
+#include "pycore_template.h" // _PyTemplate_Concat()
#include "pycore_tuple.h" // _PyTuple_FromArray()
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
#include "pycore_unicodeobject.h" // struct _Py_unicode_state
@@ -166,11 +167,7 @@ static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length)
#define _PyUnicode_HASH(op) \
(_PyASCIIObject_CAST(op)->hash)
-static inline Py_hash_t PyUnicode_HASH(PyObject *op)
-{
- assert(_PyUnicode_CHECK(op));
- return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash);
-}
+#define PyUnicode_HASH PyUnstable_Unicode_GET_CACHED_HASH
static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash)
{
@@ -3729,7 +3726,7 @@ PyUnicode_Decode(const char *s,
return NULL;
}
-PyObject *
+PyAPI_FUNC(PyObject *)
PyUnicode_AsDecodedObject(PyObject *unicode,
const char *encoding,
const char *errors)
@@ -3739,12 +3736,6 @@ PyUnicode_AsDecodedObject(PyObject *unicode,
return NULL;
}
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PyUnicode_AsDecodedObject() is deprecated "
- "and will be removed in 3.15; "
- "use PyCodec_Decode() to decode from str", 1) < 0)
- return NULL;
-
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
@@ -3752,7 +3743,7 @@ PyUnicode_AsDecodedObject(PyObject *unicode,
return PyCodec_Decode(unicode, encoding, errors);
}
-PyObject *
+PyAPI_FUNC(PyObject *)
PyUnicode_AsDecodedUnicode(PyObject *unicode,
const char *encoding,
const char *errors)
@@ -3764,12 +3755,6 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode,
goto onError;
}
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PyUnicode_AsDecodedUnicode() is deprecated "
- "and will be removed in 3.15; "
- "use PyCodec_Decode() to decode from str to str", 1) < 0)
- return NULL;
-
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
@@ -3792,7 +3777,7 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode,
return NULL;
}
-PyObject *
+PyAPI_FUNC(PyObject *)
PyUnicode_AsEncodedObject(PyObject *unicode,
const char *encoding,
const char *errors)
@@ -3804,13 +3789,6 @@ PyUnicode_AsEncodedObject(PyObject *unicode,
goto onError;
}
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PyUnicode_AsEncodedObject() is deprecated "
- "and will be removed in 3.15; "
- "use PyUnicode_AsEncodedString() to encode from str to bytes "
- "or PyCodec_Encode() for generic encoding", 1) < 0)
- return NULL;
-
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
@@ -4016,7 +3994,7 @@ PyUnicode_AsEncodedString(PyObject *unicode,
return NULL;
}
-PyObject *
+PyAPI_FUNC(PyObject *)
PyUnicode_AsEncodedUnicode(PyObject *unicode,
const char *encoding,
const char *errors)
@@ -4028,12 +4006,6 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
goto onError;
}
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PyUnicode_AsEncodedUnicode() is deprecated "
- "and will be removed in 3.15; "
- "use PyCodec_Encode() to encode from str to str", 1) < 0)
- return NULL;
-
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
@@ -6620,13 +6592,15 @@ _PyUnicode_GetNameCAPI(void)
/* --- Unicode Escape Codec ----------------------------------------------- */
PyObject *
-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed,
- const char **first_invalid_escape)
+ int *first_invalid_escape_char,
+ const char **first_invalid_escape_ptr)
{
const char *starts = s;
+ const char *initial_starts = starts;
_PyUnicodeWriter writer;
const char *end;
PyObject *errorHandler = NULL;
@@ -6634,7 +6608,8 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
_PyUnicode_Name_CAPI *ucnhash_capi;
// so we can remember if we've seen an invalid escape char or not
- *first_invalid_escape = NULL;
+ *first_invalid_escape_char = -1;
+ *first_invalid_escape_ptr = NULL;
if (size == 0) {
if (consumed) {
@@ -6722,9 +6697,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
}
}
if (ch > 0377) {
- if (*first_invalid_escape == NULL) {
- *first_invalid_escape = s-3; /* Back up 3 chars, since we've
- already incremented s. */
+ if (*first_invalid_escape_char == -1) {
+ *first_invalid_escape_char = ch;
+ if (starts == initial_starts) {
+ /* Back up 3 chars, since we've already incremented s. */
+ *first_invalid_escape_ptr = s - 3;
+ }
}
}
WRITE_CHAR(ch);
@@ -6819,9 +6797,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
goto error;
default:
- if (*first_invalid_escape == NULL) {
- *first_invalid_escape = s-1; /* Back up one char, since we've
- already incremented s. */
+ if (*first_invalid_escape_char == -1) {
+ *first_invalid_escape_char = c;
+ if (starts == initial_starts) {
+ /* Back up one char, since we've already incremented s. */
+ *first_invalid_escape_ptr = s - 1;
+ }
}
WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c);
@@ -6866,19 +6847,20 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
const char *errors,
Py_ssize_t *consumed)
{
- const char *first_invalid_escape;
- PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
+ int first_invalid_escape_char;
+ const char *first_invalid_escape_ptr;
+ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors,
consumed,
- &first_invalid_escape);
+ &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
if (result == NULL)
return NULL;
- if (first_invalid_escape != NULL) {
- unsigned char c = *first_invalid_escape;
- if ('4' <= c && c <= '7') {
+ if (first_invalid_escape_char != -1) {
+ if (first_invalid_escape_char > 0xff) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "\"\\%.3s\" is an invalid octal escape sequence. "
+ "\"\\%o\" is an invalid octal escape sequence. "
"Such sequences will not work in the future. ",
- first_invalid_escape) < 0)
+ first_invalid_escape_char) < 0)
{
Py_DECREF(result);
return NULL;
@@ -6888,7 +6870,7 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"\"\\%c\" is an invalid escape sequence. "
"Such sequences will not work in the future. ",
- c) < 0)
+ first_invalid_escape_char) < 0)
{
Py_DECREF(result);
return NULL;
@@ -11628,10 +11610,16 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
return NULL;
if (!PyUnicode_Check(right)) {
- PyErr_Format(PyExc_TypeError,
- "can only concatenate str (not \"%.200s\") to str",
- Py_TYPE(right)->tp_name);
- return NULL;
+ if (_PyTemplate_CheckExact(right)) {
+ // str + tstring is implemented in the tstring type
+ return _PyTemplate_Concat(left, right);
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "can only concatenate str (not \"%.200s\") to str",
+ Py_TYPE(right)->tp_name);
+ return NULL;
+ }
}
/* Shortcuts */
@@ -13937,7 +13925,12 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
int
PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
{
- if (Py_TYPE(obj) == &PyLong_Type) {
+ PyTypeObject *type = Py_TYPE(obj);
+ if (type == &PyUnicode_Type) {
+ return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj);
+ }
+
+ if (type == &PyLong_Type) {
return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
}
@@ -14086,6 +14079,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
return 0;
}
+
+int
+PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
+ const char *str,
+ Py_ssize_t size)
+{
+ assert(writer != NULL);
+ _Py_AssertHoldsTstate();
+
+ _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
+ return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
+}
+
+
int
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
const char *str,
@@ -15911,7 +15918,7 @@ immortalize_interned(PyObject *s)
_Py_DecRefTotal(_PyThreadState_GET());
}
#endif
- FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL);
+ FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL);
_Py_SetImmortal(s);
}
@@ -16029,7 +16036,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
Py_DECREF(s);
Py_DECREF(s);
}
- FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL);
+ FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL);
/* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */
@@ -16165,7 +16172,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
Py_UNREACHABLE();
}
if (!shared) {
- FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_NOT_INTERNED);
+ FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_NOT_INTERNED);
}
}
#ifdef INTERNED_STATS