aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Objects/stringlib
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/stringlib')
-rw-r--r--Objects/stringlib/eq.h21
-rw-r--r--Objects/stringlib/formatter.h47
-rw-r--r--Objects/stringlib/string_format.h61
-rw-r--r--Objects/stringlib/stringdefs.h22
-rw-r--r--Objects/stringlib/unicodedefs.h3
5 files changed, 93 insertions, 61 deletions
diff --git a/Objects/stringlib/eq.h b/Objects/stringlib/eq.h
new file mode 100644
index 00000000000..3e7f5e86c6c
--- /dev/null
+++ b/Objects/stringlib/eq.h
@@ -0,0 +1,21 @@
+/* Fast unicode equal function optimized for dictobject.c and setobject.c */
+
+/* Return 1 if two unicode objects are equal, 0 if not.
+ * unicode_eq() is called when the hash of two unicode objects is equal.
+ */
+Py_LOCAL_INLINE(int)
+unicode_eq(PyObject *aa, PyObject *bb)
+{
+ register PyUnicodeObject *a = (PyUnicodeObject *)aa;
+ register PyUnicodeObject *b = (PyUnicodeObject *)bb;
+
+ if (a->length != b->length)
+ return 0;
+ if (a->length == 0)
+ return 1;
+ if (a->str[0] != b->str[0])
+ return 0;
+ if (a->length == 1)
+ return 1;
+ return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
+}
diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h
index 6b282249b17..139b56cd8e0 100644
--- a/Objects/stringlib/formatter.h
+++ b/Objects/stringlib/formatter.h
@@ -774,14 +774,6 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
goto done;
}
- /* Error to specify a comma. */
- if (format->thousands_separators) {
- PyErr_SetString(PyExc_ValueError,
- "Thousands separators not allowed with integer"
- " format specifier 'c'");
- goto done;
- }
-
/* taken from unicodeobject.c formatchar() */
/* Integer input truncated to a character */
/* XXX: won't work for int */
@@ -947,20 +939,16 @@ format_float_internal(PyObject *value,
from a hard-code pseudo-locale */
LocaleInfo locale;
- /* Alternate is not allowed on floats. */
- if (format->alternate) {
- PyErr_SetString(PyExc_ValueError,
- "Alternate form (#) not allowed in float format "
- "specifier");
- goto done;
- }
+ if (format->alternate)
+ flags |= Py_DTSF_ALT;
if (type == '\0') {
- /* Omitted type specifier. This is like 'g' but with at least one
- digit after the decimal point, and different default precision.*/
- type = 'g';
- default_precision = PyFloat_STR_PRECISION;
+ /* Omitted type specifier. Behaves in the same way as repr(x)
+ and str(x) if no precision is given, else like 'g', but with
+ at least one digit after the decimal point. */
flags |= Py_DTSF_ADD_DOT_0;
+ type = 'r';
+ default_precision = 0;
}
if (type == 'n')
@@ -980,6 +968,8 @@ format_float_internal(PyObject *value,
if (precision < 0)
precision = default_precision;
+ else if (type == 'r')
+ type = 'g';
/* Cast "type", because if we're in unicode we need to pass a
8-bit char. This is safe, because we've restricted what "type"
@@ -1107,15 +1097,7 @@ format_complex_internal(PyObject *value,
from a hard-code pseudo-locale */
LocaleInfo locale;
- /* Alternate is not allowed on complex. */
- if (format->alternate) {
- PyErr_SetString(PyExc_ValueError,
- "Alternate form (#) not allowed in complex format "
- "specifier");
- goto done;
- }
-
- /* Neither is zero pading. */
+ /* Zero padding is not allowed. */
if (format->fill_char == '0') {
PyErr_SetString(PyExc_ValueError,
"Zero padding is not allowed in complex format "
@@ -1138,10 +1120,13 @@ format_complex_internal(PyObject *value,
if (im == -1.0 && PyErr_Occurred())
goto done;
+ if (format->alternate)
+ flags |= Py_DTSF_ALT;
+
if (type == '\0') {
/* Omitted type specifier. Should be like str(self). */
- type = 'g';
- default_precision = PyFloat_STR_PRECISION;
+ type = 'r';
+ default_precision = 0;
if (re == 0.0 && copysign(1.0, re) == 1.0)
skip_re = 1;
else
@@ -1155,6 +1140,8 @@ format_complex_internal(PyObject *value,
if (precision < 0)
precision = default_precision;
+ else if (type == 'r')
+ type = 'g';
/* Cast "type", because if we're in unicode we need to pass a
8-bit char. This is safe, because we've restricted what "type"
diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h
index 965e1ad0ef3..c46bdc2656e 100644
--- a/Objects/stringlib/string_format.h
+++ b/Objects/stringlib/string_format.h
@@ -496,15 +496,28 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
PyObject *key = SubString_new_object(&first);
if (key == NULL)
goto error;
- if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
+
+ /* Use PyObject_GetItem instead of PyDict_GetItem because this
+ code is no longer just used with kwargs. It might be passed
+ a non-dict when called through format_map. */
+ if ((kwargs == NULL) || (obj = PyObject_GetItem(kwargs, key)) == NULL) {
PyErr_SetObject(PyExc_KeyError, key);
Py_DECREF(key);
goto error;
}
Py_DECREF(key);
- Py_INCREF(obj);
}
else {
+ /* If args is NULL, we have a format string with a positional field
+ with only kwargs to retrieve it from. This can only happen when
+ used with format_map(), where positional arguments are not
+ allowed. */
+ if (args == NULL) {
+ PyErr_SetString(PyExc_ValueError, "Format string contains "
+ "positional fields");
+ goto error;
+ }
+
/* look up in args */
obj = PySequence_GetItem(args, index);
if (obj == NULL)
@@ -570,24 +583,15 @@ render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
/* If we know the type exactly, skip the lookup of __format__ and just
call the formatter directly. */
-#if STRINGLIB_IS_UNICODE
if (PyUnicode_CheckExact(fieldobj))
formatter = _PyUnicode_FormatAdvanced;
- /* Unfortunately, there's a problem with checking for int, long,
- and float here. If we're being included as unicode, their
- formatters expect string format_spec args. For now, just skip
- this optimization for unicode. This could be fixed, but it's a
- hassle. */
-#else
- if (PyString_CheckExact(fieldobj))
- formatter = _PyBytes_FormatAdvanced;
- else if (PyInt_CheckExact(fieldobj))
- formatter =_PyInt_FormatAdvanced;
else if (PyLong_CheckExact(fieldobj))
formatter =_PyLong_FormatAdvanced;
else if (PyFloat_CheckExact(fieldobj))
formatter = _PyFloat_FormatAdvanced;
-#endif
+
+ /* XXX: for 2.6, convert format_spec to the appropriate type
+ (unicode, str) */
if (formatter) {
/* we know exactly which formatter will be called when __format__ is
@@ -610,7 +614,7 @@ render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
#if PY_VERSION_HEX >= 0x03000000
assert(PyUnicode_Check(result));
#else
- assert(PyString_Check(result) || PyUnicode_Check(result));
+ assert(PyBytes_Check(result) || PyUnicode_Check(result));
/* Convert result to our type. We could be str, and result could
be unicode */
@@ -849,6 +853,10 @@ do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
return PyObject_Repr(obj);
case 's':
return STRINGLIB_TOSTR(obj);
+#if PY_VERSION_HEX >= 0x03000000
+ case 'a':
+ return STRINGLIB_TOASCII(obj);
+#endif
default:
if (conversion > 32 && conversion < 127) {
/* It's the ASCII subrange; casting to char is safe
@@ -1041,6 +1049,11 @@ do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
return build_string(&input, args, kwargs, recursion_depth, &auto_number);
}
+static PyObject *
+do_string_format_map(PyObject *self, PyObject *obj)
+{
+ return do_string_format(self, NULL, obj);
+}
/************************************************************************/
@@ -1154,7 +1167,7 @@ static PyTypeObject PyFormatterIter_Type = {
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
- 0, /* tp_compare */
+ 0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
@@ -1182,10 +1195,15 @@ static PyTypeObject PyFormatterIter_Type = {
describing the parsed elements. It's a wrapper around
stringlib/string_format.h's MarkupIterator */
static PyObject *
-formatter_parser(STRINGLIB_OBJECT *self)
+formatter_parser(PyObject *ignored, STRINGLIB_OBJECT *self)
{
formatteriterobject *it;
+ if (!PyUnicode_Check(self)) {
+ PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
+ return NULL;
+ }
+
it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
if (it == NULL)
return NULL;
@@ -1287,7 +1305,7 @@ static PyTypeObject PyFieldNameIter_Type = {
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
- 0, /* tp_compare */
+ 0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
@@ -1317,7 +1335,7 @@ static PyTypeObject PyFieldNameIter_Type = {
field_name_split. The iterator it returns is a
FieldNameIterator */
static PyObject *
-formatter_field_name_split(STRINGLIB_OBJECT *self)
+formatter_field_name_split(PyObject *ignored, STRINGLIB_OBJECT *self)
{
SubString first;
Py_ssize_t first_idx;
@@ -1326,6 +1344,11 @@ formatter_field_name_split(STRINGLIB_OBJECT *self)
PyObject *first_obj = NULL;
PyObject *result = NULL;
+ if (!PyUnicode_Check(self)) {
+ PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
+ return NULL;
+ }
+
it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
if (it == NULL)
return NULL;
diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h
index 84e461628e1..1c49426ff64 100644
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
@@ -6,7 +6,7 @@
compiled as unicode. */
#define STRINGLIB_IS_UNICODE 0
-#define STRINGLIB_OBJECT PyStringObject
+#define STRINGLIB_OBJECT PyBytesObject
#define STRINGLIB_CHAR char
#define STRINGLIB_TYPE_NAME "string"
#define STRINGLIB_PARSE_CODE "S"
@@ -18,16 +18,14 @@
#define STRINGLIB_TOUPPER Py_TOUPPER
#define STRINGLIB_TOLOWER Py_TOLOWER
#define STRINGLIB_FILL memset
-#define STRINGLIB_STR PyString_AS_STRING
-#define STRINGLIB_LEN PyString_GET_SIZE
-#define STRINGLIB_NEW PyString_FromStringAndSize
-#define STRINGLIB_RESIZE _PyString_Resize
-#define STRINGLIB_CHECK PyString_Check
-#define STRINGLIB_CHECK_EXACT PyString_CheckExact
+#define STRINGLIB_STR PyBytes_AS_STRING
+#define STRINGLIB_LEN PyBytes_GET_SIZE
+#define STRINGLIB_NEW PyBytes_FromStringAndSize
+#define STRINGLIB_RESIZE _PyBytes_Resize
+#define STRINGLIB_CHECK PyBytes_Check
+#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
#define STRINGLIB_TOSTR PyObject_Str
-#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
-#define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale
-
-#define STRINGLIB_WANT_CONTAINS_OBJ 1
-
+#define STRINGLIB_GROUPING _PyBytes_InsertThousandsGrouping
+#define STRINGLIB_GROUPING_LOCALE _PyBytes_InsertThousandsGroupingLocale
+#define STRINGLIB_TOASCII PyObject_Repr
#endif /* !STRINGLIB_STRINGDEFS_H */
diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h
index dd814f6c90e..09dae6dc9ed 100644
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
@@ -25,11 +25,14 @@
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
+#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale
#if PY_VERSION_HEX < 0x03000000
#define STRINGLIB_TOSTR PyObject_Unicode
+#define STRINGLIB_TOASCII PyObject_Repr
#else
#define STRINGLIB_TOSTR PyObject_Str
+#define STRINGLIB_TOASCII PyObject_ASCII
#endif
#define STRINGLIB_WANT_CONTAINS_OBJ 1