diff options
author | Sergey B Kirpichev <skirpichev@gmail.com> | 2025-02-25 18:27:07 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-02-25 16:27:07 +0100 |
commit | f39a07be47cd9219eaf0e538ae32ad8239c88e66 (patch) | |
tree | 1b57568378401b44c9ffd8d3f237b13694842b5c /Python | |
parent | fa6a8140dd2a72da6df2a7dfafbf07045debf64d (diff) | |
download | cpython-f39a07be47cd9219eaf0e538ae32ad8239c88e66.tar.gz cpython-f39a07be47cd9219eaf0e538ae32ad8239c88e66.zip |
gh-87790: support thousands separators for formatting fractional part of floats (#125304)
```pycon
>>> f"{123_456.123_456:_._f}" # Whole and fractional
'123_456.123_456'
>>> f"{123_456.123_456:_f}" # Integer component only
'123_456.123456'
>>> f"{123_456.123_456:._f}" # Fractional component only
'123456.123_456'
>>> f"{123_456.123_456:.4_f}" # with precision
'123456.1_235'
```
Diffstat (limited to 'Python')
-rw-r--r-- | Python/formatter_unicode.c | 160 |
1 files changed, 131 insertions, 29 deletions
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 16f71118499..4f8662b0a4c 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -135,6 +135,7 @@ typedef struct { Py_ssize_t width; enum LocaleType thousands_separators; Py_ssize_t precision; + enum LocaleType frac_thousands_separator; Py_UCS4 type; } InternalFormatSpec; @@ -171,6 +172,7 @@ parse_internal_render_format_spec(PyObject *obj, format->sign = '\0'; format->width = -1; format->thousands_separators = LT_NO_LOCALE; + format->frac_thousands_separator = LT_NO_LOCALE; format->precision = -1; format->type = default_type; @@ -260,7 +262,35 @@ parse_internal_render_format_spec(PyObject *obj, /* Overflow error. Exception already set. */ return 0; - /* Not having a precision after a dot is an error. */ + if (end-pos && READ_spec(pos) == ',') { + if (consumed == 0) { + format->precision = -1; + } + format->frac_thousands_separator = LT_DEFAULT_LOCALE; + ++pos; + ++consumed; + } + if (end-pos && READ_spec(pos) == '_') { + if (format->frac_thousands_separator != LT_NO_LOCALE) { + invalid_comma_and_underscore(); + return 0; + } + if (consumed == 0) { + format->precision = -1; + } + format->frac_thousands_separator = LT_UNDERSCORE_LOCALE; + ++pos; + ++consumed; + } + if (end-pos && READ_spec(pos) == ',') { + if (format->frac_thousands_separator == LT_UNDERSCORE_LOCALE) { + invalid_comma_and_underscore(); + return 0; + } + } + + /* Not having a precision or underscore/comma after a dot + is an error. */ if (consumed == 0) { PyErr_Format(PyExc_ValueError, "Format specifier missing precision"); @@ -327,6 +357,14 @@ parse_internal_render_format_spec(PyObject *obj, } } + if (format->type == 'n' + && format->frac_thousands_separator != LT_NO_LOCALE) + { + invalid_thousands_separator_type(format->frac_thousands_separator, + format->type); + return 0; + } + assert (format->align <= 127); assert (format->sign <= 127); return 1; @@ -402,6 +440,7 @@ fill_padding(_PyUnicodeWriter *writer, typedef struct { PyObject *decimal_point; PyObject *thousands_sep; + PyObject *frac_thousands_sep; const char *grouping; char *grouping_buffer; } LocaleInfo; @@ -423,6 +462,8 @@ typedef struct { Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part, excluding the decimal itself, if present. */ + Py_ssize_t n_frac; + Py_ssize_t n_grouped_frac_digits; /* These 2 are not the widths of fields, but are needed by STRINGLIB_GROUPING. */ @@ -445,24 +486,32 @@ typedef struct { */ static void parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end, - Py_ssize_t *n_remainder, int *has_decimal) + Py_ssize_t *n_remainder, Py_ssize_t *n_frac, int *has_decimal) { - Py_ssize_t remainder; + Py_ssize_t frac; int kind = PyUnicode_KIND(s); const void *data = PyUnicode_DATA(s); - while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos))) + while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos))) { ++pos; - remainder = pos; + } + frac = pos; /* Does remainder start with a decimal point? */ - *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.'; + *has_decimal = pos<end && PyUnicode_READ(kind, data, frac) == '.'; /* Skip the decimal point. */ - if (*has_decimal) - remainder++; + if (*has_decimal) { + frac++; + pos++; + } + + while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos))) { + ++pos; + } - *n_remainder = end - remainder; + *n_frac = pos - frac; + *n_remainder = end - pos; } /* not all fields of format are used. for example, precision is @@ -473,18 +522,19 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end, static Py_ssize_t calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, Py_UCS4 sign_char, Py_ssize_t n_start, - Py_ssize_t n_end, Py_ssize_t n_remainder, + Py_ssize_t n_end, Py_ssize_t n_remainder, Py_ssize_t n_frac, int has_decimal, const LocaleInfo *locale, const InternalFormatSpec *format, Py_UCS4 *maxchar) { Py_ssize_t n_non_digit_non_padding; Py_ssize_t n_padding; - spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0); + spec->n_digits = n_end - n_start - n_frac - n_remainder - (has_decimal?1:0); spec->n_lpadding = 0; spec->n_prefix = n_prefix; spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0; spec->n_remainder = n_remainder; + spec->n_frac = n_frac; spec->n_spadding = 0; spec->n_rpadding = 0; spec->sign = '\0'; @@ -530,7 +580,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, /* The number of chars used for non-digits and non-padding. */ n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal + - spec->n_remainder; + + spec->n_frac + spec->n_remainder; /* min_width can go negative, that's okay. format->width == -1 means we don't care. */ @@ -550,19 +600,36 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, NULL, 0, NULL, 0, spec->n_digits, spec->n_min_width, - locale->grouping, locale->thousands_sep, &grouping_maxchar); + locale->grouping, locale->thousands_sep, &grouping_maxchar, 0); if (spec->n_grouped_digits == -1) { return -1; } *maxchar = Py_MAX(*maxchar, grouping_maxchar); } + if (spec->n_frac == 0) { + spec->n_grouped_frac_digits = 0; + } + else { + Py_UCS4 grouping_maxchar; + spec->n_grouped_frac_digits = _PyUnicode_InsertThousandsGrouping( + NULL, 0, + NULL, 0, spec->n_frac, + spec->n_frac, + locale->grouping, locale->frac_thousands_sep, &grouping_maxchar, 1); + if (spec->n_grouped_frac_digits == -1) { + return -1; + } + *maxchar = Py_MAX(*maxchar, grouping_maxchar); + } + /* Given the desired width and the total of digit and non-digit space we consume, see if we need any padding. format->width can be negative (meaning no padding), but this code still works in that case. */ n_padding = format->width - - (n_non_digit_non_padding + spec->n_grouped_digits); + (n_non_digit_non_padding + spec->n_grouped_digits + + spec->n_grouped_frac_digits - spec->n_frac); if (n_padding > 0) { /* Some padding is needed. Determine if it's left, space, or right. */ switch (format->align) { @@ -593,7 +660,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, return spec->n_lpadding + spec->n_sign + spec->n_prefix + spec->n_spadding + spec->n_grouped_digits + spec->n_decimal + - spec->n_remainder + spec->n_rpadding; + spec->n_grouped_frac_digits + spec->n_remainder + spec->n_rpadding; } /* Fill in the digit parts of a number's string representation, @@ -649,7 +716,7 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, writer, spec->n_grouped_digits, digits, d_pos, spec->n_digits, spec->n_min_width, - locale->grouping, locale->thousands_sep, NULL); + locale->grouping, locale->thousands_sep, NULL, 0); if (r == -1) return -1; assert(r == spec->n_grouped_digits); @@ -677,6 +744,19 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, d_pos += 1; } + if (spec->n_frac) { + r = _PyUnicode_InsertThousandsGrouping( + writer, spec->n_grouped_frac_digits, + digits, d_pos, spec->n_frac, spec->n_frac, + locale->grouping, locale->frac_thousands_sep, NULL, 1); + if (r == -1) { + return -1; + } + assert(r == spec->n_grouped_frac_digits); + d_pos += spec->n_frac; + writer->pos += spec->n_grouped_frac_digits; + } + if (spec->n_remainder) { _PyUnicode_FastCopyCharacters( writer->buffer, writer->pos, @@ -701,7 +781,8 @@ static const char no_grouping[1] = {CHAR_MAX}; LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */ static int -get_locale_info(enum LocaleType type, LocaleInfo *locale_info) +get_locale_info(enum LocaleType type, enum LocaleType frac_type, + LocaleInfo *locale_info) { switch (type) { case LT_CURRENT_LOCALE: { @@ -746,6 +827,19 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info) locale_info->grouping = no_grouping; break; } + if (frac_type != LT_NO_LOCALE) { + locale_info->frac_thousands_sep = PyUnicode_FromOrdinal( + frac_type == LT_DEFAULT_LOCALE ? ',' : '_'); + if (!locale_info->frac_thousands_sep) { + return -1; + } + if (locale_info->grouping == no_grouping) { + locale_info->grouping = "\3"; + } + } + else { + locale_info->frac_thousands_sep = Py_GetConstant(Py_CONSTANT_EMPTY_STR); + } return 0; } @@ -754,6 +848,7 @@ free_locale_info(LocaleInfo *locale_info) { Py_XDECREF(locale_info->decimal_point); Py_XDECREF(locale_info->thousands_sep); + Py_XDECREF(locale_info->frac_thousands_sep); PyMem_Free(locale_info->grouping_buffer); } @@ -1005,13 +1100,13 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - format->thousands_separators, + format->thousands_separators, 0, &locale) == -1) goto done; /* Calculate how much memory we'll need. */ n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars, - inumeric_chars + n_digits, n_remainder, 0, + inumeric_chars + n_digits, n_remainder, 0, 0, &locale, format, &maxchar); if (n_total == -1) { goto done; @@ -1046,6 +1141,7 @@ format_float_internal(PyObject *value, char *buf = NULL; /* buffer returned from PyOS_double_to_string */ Py_ssize_t n_digits; Py_ssize_t n_remainder; + Py_ssize_t n_frac; Py_ssize_t n_total; int has_decimal; double val; @@ -1125,7 +1221,8 @@ format_float_internal(PyObject *value, if (format->sign != '+' && format->sign != ' ' && format->width == -1 && format->type != 'n' - && !format->thousands_separators) + && !format->thousands_separators + && !format->frac_thousands_separator) { /* Fast path */ result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits); @@ -1151,18 +1248,20 @@ format_float_internal(PyObject *value, /* Determine if we have any "remainder" (after the digits, might include decimal or exponent or both (or neither)) */ - parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal); + parse_number(unicode_tmp, index, index + n_digits, + &n_remainder, &n_frac, &has_decimal); /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : format->thousands_separators, + format->frac_thousands_separator, &locale) == -1) goto done; /* Calculate how much memory we'll need. */ n_total = calc_number_widths(&spec, 0, sign_char, index, - index + n_digits, n_remainder, has_decimal, - &locale, format, &maxchar); + index + n_digits, n_remainder, n_frac, + has_decimal, &locale, format, &maxchar); if (n_total == -1) { goto done; } @@ -1202,6 +1301,8 @@ format_complex_internal(PyObject *value, Py_ssize_t n_im_digits; Py_ssize_t n_re_remainder; Py_ssize_t n_im_remainder; + Py_ssize_t n_re_frac; + Py_ssize_t n_im_frac; Py_ssize_t n_re_total; Py_ssize_t n_im_total; int re_has_decimal; @@ -1330,13 +1431,14 @@ format_complex_internal(PyObject *value, /* Determine if we have any "remainder" (after the digits, might include decimal or exponent or both (or neither)) */ parse_number(re_unicode_tmp, i_re, i_re + n_re_digits, - &n_re_remainder, &re_has_decimal); + &n_re_remainder, &n_re_frac, &re_has_decimal); parse_number(im_unicode_tmp, i_im, i_im + n_im_digits, - &n_im_remainder, &im_has_decimal); + &n_im_remainder, &n_im_frac, &im_has_decimal); /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : format->thousands_separators, + format->frac_thousands_separator, &locale) == -1) goto done; @@ -1349,8 +1451,8 @@ format_complex_internal(PyObject *value, /* Calculate how much memory we'll need. */ n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, i_re, i_re + n_re_digits, n_re_remainder, - re_has_decimal, &locale, &tmp_format, - &maxchar); + n_re_frac, re_has_decimal, &locale, + &tmp_format, &maxchar); if (n_re_total == -1) { goto done; } @@ -1362,8 +1464,8 @@ format_complex_internal(PyObject *value, tmp_format.sign = '+'; n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, i_im, i_im + n_im_digits, n_im_remainder, - im_has_decimal, &locale, &tmp_format, - &maxchar); + n_im_frac, im_has_decimal, &locale, + &tmp_format, &maxchar); if (n_im_total == -1) { goto done; } |