aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c73
1 files changed, 49 insertions, 24 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index f3f0c9646a6..5c2308a0121 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -167,11 +167,7 @@ static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length)
#define _PyUnicode_HASH(op) \
(_PyASCIIObject_CAST(op)->hash)
-static inline Py_hash_t PyUnicode_HASH(PyObject *op)
-{
- assert(_PyUnicode_CHECK(op));
- return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash);
-}
+#define PyUnicode_HASH PyUnstable_Unicode_GET_CACHED_HASH
static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash)
{
@@ -6596,13 +6592,15 @@ _PyUnicode_GetNameCAPI(void)
/* --- Unicode Escape Codec ----------------------------------------------- */
PyObject *
-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed,
- const char **first_invalid_escape)
+ int *first_invalid_escape_char,
+ const char **first_invalid_escape_ptr)
{
const char *starts = s;
+ const char *initial_starts = starts;
_PyUnicodeWriter writer;
const char *end;
PyObject *errorHandler = NULL;
@@ -6610,7 +6608,8 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
_PyUnicode_Name_CAPI *ucnhash_capi;
// so we can remember if we've seen an invalid escape char or not
- *first_invalid_escape = NULL;
+ *first_invalid_escape_char = -1;
+ *first_invalid_escape_ptr = NULL;
if (size == 0) {
if (consumed) {
@@ -6698,9 +6697,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
}
}
if (ch > 0377) {
- if (*first_invalid_escape == NULL) {
- *first_invalid_escape = s-3; /* Back up 3 chars, since we've
- already incremented s. */
+ if (*first_invalid_escape_char == -1) {
+ *first_invalid_escape_char = ch;
+ if (starts == initial_starts) {
+ /* Back up 3 chars, since we've already incremented s. */
+ *first_invalid_escape_ptr = s - 3;
+ }
}
}
WRITE_CHAR(ch);
@@ -6795,9 +6797,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
goto error;
default:
- if (*first_invalid_escape == NULL) {
- *first_invalid_escape = s-1; /* Back up one char, since we've
- already incremented s. */
+ if (*first_invalid_escape_char == -1) {
+ *first_invalid_escape_char = c;
+ if (starts == initial_starts) {
+ /* Back up one char, since we've already incremented s. */
+ *first_invalid_escape_ptr = s - 1;
+ }
}
WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c);
@@ -6842,19 +6847,20 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
const char *errors,
Py_ssize_t *consumed)
{
- const char *first_invalid_escape;
- PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
+ int first_invalid_escape_char;
+ const char *first_invalid_escape_ptr;
+ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors,
consumed,
- &first_invalid_escape);
+ &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
if (result == NULL)
return NULL;
- if (first_invalid_escape != NULL) {
- unsigned char c = *first_invalid_escape;
- if ('4' <= c && c <= '7') {
+ if (first_invalid_escape_char != -1) {
+ if (first_invalid_escape_char > 0xff) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "\"\\%.3s\" is an invalid octal escape sequence. "
+ "\"\\%o\" is an invalid octal escape sequence. "
"Such sequences will not work in the future. ",
- first_invalid_escape) < 0)
+ first_invalid_escape_char) < 0)
{
Py_DECREF(result);
return NULL;
@@ -6864,7 +6870,7 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"\"\\%c\" is an invalid escape sequence. "
"Such sequences will not work in the future. ",
- c) < 0)
+ first_invalid_escape_char) < 0)
{
Py_DECREF(result);
return NULL;
@@ -13919,7 +13925,12 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
int
PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
{
- if (Py_TYPE(obj) == &PyLong_Type) {
+ PyTypeObject *type = Py_TYPE(obj);
+ if (type == &PyUnicode_Type) {
+ return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj);
+ }
+
+ if (type == &PyLong_Type) {
return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
}
@@ -14068,6 +14079,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
return 0;
}
+
+int
+PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
+ const char *str,
+ Py_ssize_t size)
+{
+ assert(writer != NULL);
+ _Py_AssertHoldsTstate();
+
+ _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
+ return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
+}
+
+
int
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
const char *str,