aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c13
1 files changed, 11 insertions, 2 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 279cdaa668e..d11a9dca14b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2581,6 +2581,7 @@ unicode_fromformat_write_utf8(_PyUnicodeWriter *writer, const char *str,
Py_ssize_t width, Py_ssize_t precision, int flags)
{
/* UTF-8 */
+ Py_ssize_t *pconsumed = NULL;
Py_ssize_t length;
if (precision == -1) {
length = strlen(str);
@@ -2590,15 +2591,23 @@ unicode_fromformat_write_utf8(_PyUnicodeWriter *writer, const char *str,
while (length < precision && str[length]) {
length++;
}
+ if (length == precision) {
+ /* The input string is not NUL-terminated. If it ends with an
+ * incomplete UTF-8 sequence, truncate the string just before it.
+ * Incomplete sequences in the middle and sequences which cannot
+ * be valid prefixes are still treated as errors and replaced
+ * with \xfffd. */
+ pconsumed = &length;
+ }
}
if (width < 0) {
return unicode_decode_utf8_writer(writer, str, length,
- _Py_ERROR_REPLACE, "replace", NULL);
+ _Py_ERROR_REPLACE, "replace", pconsumed);
}
PyObject *unicode = PyUnicode_DecodeUTF8Stateful(str, length,
- "replace", NULL);
+ "replace", pconsumed);
if (unicode == NULL)
return -1;