diff options
Diffstat (limited to 'Objects/stringlib')
-rw-r--r-- | Objects/stringlib/eq.h | 21 | ||||
-rw-r--r-- | Objects/stringlib/formatter.h | 47 | ||||
-rw-r--r-- | Objects/stringlib/string_format.h | 61 | ||||
-rw-r--r-- | Objects/stringlib/stringdefs.h | 22 | ||||
-rw-r--r-- | Objects/stringlib/unicodedefs.h | 3 |
5 files changed, 93 insertions, 61 deletions
diff --git a/Objects/stringlib/eq.h b/Objects/stringlib/eq.h new file mode 100644 index 00000000000..3e7f5e86c6c --- /dev/null +++ b/Objects/stringlib/eq.h @@ -0,0 +1,21 @@ +/* Fast unicode equal function optimized for dictobject.c and setobject.c */ + +/* Return 1 if two unicode objects are equal, 0 if not. + * unicode_eq() is called when the hash of two unicode objects is equal. + */ +Py_LOCAL_INLINE(int) +unicode_eq(PyObject *aa, PyObject *bb) +{ + register PyUnicodeObject *a = (PyUnicodeObject *)aa; + register PyUnicodeObject *b = (PyUnicodeObject *)bb; + + if (a->length != b->length) + return 0; + if (a->length == 0) + return 1; + if (a->str[0] != b->str[0]) + return 0; + if (a->length == 1) + return 1; + return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0; +} diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h index 6b282249b17..139b56cd8e0 100644 --- a/Objects/stringlib/formatter.h +++ b/Objects/stringlib/formatter.h @@ -774,14 +774,6 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, goto done; } - /* Error to specify a comma. */ - if (format->thousands_separators) { - PyErr_SetString(PyExc_ValueError, - "Thousands separators not allowed with integer" - " format specifier 'c'"); - goto done; - } - /* taken from unicodeobject.c formatchar() */ /* Integer input truncated to a character */ /* XXX: won't work for int */ @@ -947,20 +939,16 @@ format_float_internal(PyObject *value, from a hard-code pseudo-locale */ LocaleInfo locale; - /* Alternate is not allowed on floats. */ - if (format->alternate) { - PyErr_SetString(PyExc_ValueError, - "Alternate form (#) not allowed in float format " - "specifier"); - goto done; - } + if (format->alternate) + flags |= Py_DTSF_ALT; if (type == '\0') { - /* Omitted type specifier. This is like 'g' but with at least one - digit after the decimal point, and different default precision.*/ - type = 'g'; - default_precision = PyFloat_STR_PRECISION; + /* Omitted type specifier. Behaves in the same way as repr(x) + and str(x) if no precision is given, else like 'g', but with + at least one digit after the decimal point. */ flags |= Py_DTSF_ADD_DOT_0; + type = 'r'; + default_precision = 0; } if (type == 'n') @@ -980,6 +968,8 @@ format_float_internal(PyObject *value, if (precision < 0) precision = default_precision; + else if (type == 'r') + type = 'g'; /* Cast "type", because if we're in unicode we need to pass a 8-bit char. This is safe, because we've restricted what "type" @@ -1107,15 +1097,7 @@ format_complex_internal(PyObject *value, from a hard-code pseudo-locale */ LocaleInfo locale; - /* Alternate is not allowed on complex. */ - if (format->alternate) { - PyErr_SetString(PyExc_ValueError, - "Alternate form (#) not allowed in complex format " - "specifier"); - goto done; - } - - /* Neither is zero pading. */ + /* Zero padding is not allowed. */ if (format->fill_char == '0') { PyErr_SetString(PyExc_ValueError, "Zero padding is not allowed in complex format " @@ -1138,10 +1120,13 @@ format_complex_internal(PyObject *value, if (im == -1.0 && PyErr_Occurred()) goto done; + if (format->alternate) + flags |= Py_DTSF_ALT; + if (type == '\0') { /* Omitted type specifier. Should be like str(self). */ - type = 'g'; - default_precision = PyFloat_STR_PRECISION; + type = 'r'; + default_precision = 0; if (re == 0.0 && copysign(1.0, re) == 1.0) skip_re = 1; else @@ -1155,6 +1140,8 @@ format_complex_internal(PyObject *value, if (precision < 0) precision = default_precision; + else if (type == 'r') + type = 'g'; /* Cast "type", because if we're in unicode we need to pass a 8-bit char. This is safe, because we've restricted what "type" diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h index 965e1ad0ef3..c46bdc2656e 100644 --- a/Objects/stringlib/string_format.h +++ b/Objects/stringlib/string_format.h @@ -496,15 +496,28 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs, PyObject *key = SubString_new_object(&first); if (key == NULL) goto error; - if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) { + + /* Use PyObject_GetItem instead of PyDict_GetItem because this + code is no longer just used with kwargs. It might be passed + a non-dict when called through format_map. */ + if ((kwargs == NULL) || (obj = PyObject_GetItem(kwargs, key)) == NULL) { PyErr_SetObject(PyExc_KeyError, key); Py_DECREF(key); goto error; } Py_DECREF(key); - Py_INCREF(obj); } else { + /* If args is NULL, we have a format string with a positional field + with only kwargs to retrieve it from. This can only happen when + used with format_map(), where positional arguments are not + allowed. */ + if (args == NULL) { + PyErr_SetString(PyExc_ValueError, "Format string contains " + "positional fields"); + goto error; + } + /* look up in args */ obj = PySequence_GetItem(args, index); if (obj == NULL) @@ -570,24 +583,15 @@ render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output) /* If we know the type exactly, skip the lookup of __format__ and just call the formatter directly. */ -#if STRINGLIB_IS_UNICODE if (PyUnicode_CheckExact(fieldobj)) formatter = _PyUnicode_FormatAdvanced; - /* Unfortunately, there's a problem with checking for int, long, - and float here. If we're being included as unicode, their - formatters expect string format_spec args. For now, just skip - this optimization for unicode. This could be fixed, but it's a - hassle. */ -#else - if (PyString_CheckExact(fieldobj)) - formatter = _PyBytes_FormatAdvanced; - else if (PyInt_CheckExact(fieldobj)) - formatter =_PyInt_FormatAdvanced; else if (PyLong_CheckExact(fieldobj)) formatter =_PyLong_FormatAdvanced; else if (PyFloat_CheckExact(fieldobj)) formatter = _PyFloat_FormatAdvanced; -#endif + + /* XXX: for 2.6, convert format_spec to the appropriate type + (unicode, str) */ if (formatter) { /* we know exactly which formatter will be called when __format__ is @@ -610,7 +614,7 @@ render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output) #if PY_VERSION_HEX >= 0x03000000 assert(PyUnicode_Check(result)); #else - assert(PyString_Check(result) || PyUnicode_Check(result)); + assert(PyBytes_Check(result) || PyUnicode_Check(result)); /* Convert result to our type. We could be str, and result could be unicode */ @@ -849,6 +853,10 @@ do_conversion(PyObject *obj, STRINGLIB_CHAR conversion) return PyObject_Repr(obj); case 's': return STRINGLIB_TOSTR(obj); +#if PY_VERSION_HEX >= 0x03000000 + case 'a': + return STRINGLIB_TOASCII(obj); +#endif default: if (conversion > 32 && conversion < 127) { /* It's the ASCII subrange; casting to char is safe @@ -1041,6 +1049,11 @@ do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) return build_string(&input, args, kwargs, recursion_depth, &auto_number); } +static PyObject * +do_string_format_map(PyObject *self, PyObject *obj) +{ + return do_string_format(self, NULL, obj); +} /************************************************************************/ @@ -1154,7 +1167,7 @@ static PyTypeObject PyFormatterIter_Type = { 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ - 0, /* tp_compare */ + 0, /* tp_reserved */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ @@ -1182,10 +1195,15 @@ static PyTypeObject PyFormatterIter_Type = { describing the parsed elements. It's a wrapper around stringlib/string_format.h's MarkupIterator */ static PyObject * -formatter_parser(STRINGLIB_OBJECT *self) +formatter_parser(PyObject *ignored, STRINGLIB_OBJECT *self) { formatteriterobject *it; + if (!PyUnicode_Check(self)) { + PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); + return NULL; + } + it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); if (it == NULL) return NULL; @@ -1287,7 +1305,7 @@ static PyTypeObject PyFieldNameIter_Type = { 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ - 0, /* tp_compare */ + 0, /* tp_reserved */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ @@ -1317,7 +1335,7 @@ static PyTypeObject PyFieldNameIter_Type = { field_name_split. The iterator it returns is a FieldNameIterator */ static PyObject * -formatter_field_name_split(STRINGLIB_OBJECT *self) +formatter_field_name_split(PyObject *ignored, STRINGLIB_OBJECT *self) { SubString first; Py_ssize_t first_idx; @@ -1326,6 +1344,11 @@ formatter_field_name_split(STRINGLIB_OBJECT *self) PyObject *first_obj = NULL; PyObject *result = NULL; + if (!PyUnicode_Check(self)) { + PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); + return NULL; + } + it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); if (it == NULL) return NULL; diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h index 84e461628e1..1c49426ff64 100644 --- a/Objects/stringlib/stringdefs.h +++ b/Objects/stringlib/stringdefs.h @@ -6,7 +6,7 @@ compiled as unicode. */ #define STRINGLIB_IS_UNICODE 0 -#define STRINGLIB_OBJECT PyStringObject +#define STRINGLIB_OBJECT PyBytesObject #define STRINGLIB_CHAR char #define STRINGLIB_TYPE_NAME "string" #define STRINGLIB_PARSE_CODE "S" @@ -18,16 +18,14 @@ #define STRINGLIB_TOUPPER Py_TOUPPER #define STRINGLIB_TOLOWER Py_TOLOWER #define STRINGLIB_FILL memset -#define STRINGLIB_STR PyString_AS_STRING -#define STRINGLIB_LEN PyString_GET_SIZE -#define STRINGLIB_NEW PyString_FromStringAndSize -#define STRINGLIB_RESIZE _PyString_Resize -#define STRINGLIB_CHECK PyString_Check -#define STRINGLIB_CHECK_EXACT PyString_CheckExact +#define STRINGLIB_STR PyBytes_AS_STRING +#define STRINGLIB_LEN PyBytes_GET_SIZE +#define STRINGLIB_NEW PyBytes_FromStringAndSize +#define STRINGLIB_RESIZE _PyBytes_Resize +#define STRINGLIB_CHECK PyBytes_Check +#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact #define STRINGLIB_TOSTR PyObject_Str -#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping -#define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale - -#define STRINGLIB_WANT_CONTAINS_OBJ 1 - +#define STRINGLIB_GROUPING _PyBytes_InsertThousandsGrouping +#define STRINGLIB_GROUPING_LOCALE _PyBytes_InsertThousandsGroupingLocale +#define STRINGLIB_TOASCII PyObject_Repr #endif /* !STRINGLIB_STRINGDEFS_H */ diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h index dd814f6c90e..09dae6dc9ed 100644 --- a/Objects/stringlib/unicodedefs.h +++ b/Objects/stringlib/unicodedefs.h @@ -25,11 +25,14 @@ #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact #define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping +#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale #if PY_VERSION_HEX < 0x03000000 #define STRINGLIB_TOSTR PyObject_Unicode +#define STRINGLIB_TOASCII PyObject_Repr #else #define STRINGLIB_TOSTR PyObject_Str +#define STRINGLIB_TOASCII PyObject_ASCII #endif #define STRINGLIB_WANT_CONTAINS_OBJ 1 |