aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Objects/stringlib/repr.h
blob: 87b1a8ba629dc675c6198a9d214b830b23017f5d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/* stringlib: repr() implementation */

#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif


static void
STRINGLIB(repr)(PyObject *unicode, Py_UCS4 quote,
                STRINGLIB_CHAR *odata)
{
    Py_ssize_t isize = PyUnicode_GET_LENGTH(unicode);
    const void *idata = PyUnicode_DATA(unicode);
    int ikind = PyUnicode_KIND(unicode);

    *odata++ = quote;
    for (Py_ssize_t i = 0; i < isize; i++) {
        Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);

        /* Escape quotes and backslashes */
        if ((ch == quote) || (ch == '\\')) {
            *odata++ = '\\';
            *odata++ = ch;
            continue;
        }

        /* Map special whitespace to '\t', \n', '\r' */
        if (ch == '\t') {
            *odata++ = '\\';
            *odata++ = 't';
        }
        else if (ch == '\n') {
            *odata++ = '\\';
            *odata++ = 'n';
        }
        else if (ch == '\r') {
            *odata++ = '\\';
            *odata++ = 'r';
        }

        /* Map non-printable US ASCII to '\xhh' */
        else if (ch < ' ' || ch == 0x7F) {
            *odata++ = '\\';
            *odata++ = 'x';
            *odata++ = Py_hexdigits[(ch >> 4) & 0x000F];
            *odata++ = Py_hexdigits[ch & 0x000F];
        }

        /* Copy ASCII characters as-is */
        else if (ch < 0x7F) {
            *odata++ = ch;
        }

        /* Non-ASCII characters */
        else {
            /* Map Unicode whitespace and control characters
               (categories Z* and C* except ASCII space)
            */
            if (!Py_UNICODE_ISPRINTABLE(ch)) {
                *odata++ = '\\';
                /* Map 8-bit characters to '\xhh' */
                if (ch <= 0xff) {
                    *odata++ = 'x';
                    *odata++ = Py_hexdigits[(ch >> 4) & 0x000F];
                    *odata++ = Py_hexdigits[ch & 0x000F];
                }
                /* Map 16-bit characters to '\uxxxx' */
                else if (ch <= 0xffff) {
                    *odata++ = 'u';
                    *odata++ = Py_hexdigits[(ch >> 12) & 0xF];
                    *odata++ = Py_hexdigits[(ch >> 8) & 0xF];
                    *odata++ = Py_hexdigits[(ch >> 4) & 0xF];
                    *odata++ = Py_hexdigits[ch & 0xF];
                }
                /* Map 21-bit characters to '\U00xxxxxx' */
                else {
                    *odata++ = 'U';
                    *odata++ = Py_hexdigits[(ch >> 28) & 0xF];
                    *odata++ = Py_hexdigits[(ch >> 24) & 0xF];
                    *odata++ = Py_hexdigits[(ch >> 20) & 0xF];
                    *odata++ = Py_hexdigits[(ch >> 16) & 0xF];
                    *odata++ = Py_hexdigits[(ch >> 12) & 0xF];
                    *odata++ = Py_hexdigits[(ch >> 8) & 0xF];
                    *odata++ = Py_hexdigits[(ch >> 4) & 0xF];
                    *odata++ = Py_hexdigits[ch & 0xF];
                }
            }
            /* Copy characters as-is */
            else {
                *odata++ = ch;
            }
        }
    }
    *odata = quote;
}