diff options
Diffstat (limited to 'Python/preconfig.c')
-rw-r--r-- | Python/preconfig.c | 400 |
1 files changed, 395 insertions, 5 deletions
diff --git a/Python/preconfig.c b/Python/preconfig.c index af70f3871cc..3befecfaa03 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -1,6 +1,8 @@ #include "Python.h" #include "pycore_coreconfig.h" #include "pycore_getopt.h" +#include "pycore_pystate.h" /* _PyRuntime_Initialize() */ +#include <locale.h> /* setlocale() */ #define DECODE_LOCALE_ERR(NAME, LEN) \ @@ -99,6 +101,8 @@ typedef struct { const _PyArgv *args; int argc; wchar_t **argv; + int nxoption; /* Number of -X options */ + wchar_t **xoptions; /* -X options */ } _PyPreCmdline; @@ -109,6 +113,10 @@ precmdline_clear(_PyPreCmdline *cmdline) _Py_wstrlist_clear(cmdline->args->argc, cmdline->argv); } cmdline->argv = NULL; + + _Py_wstrlist_clear(cmdline->nxoption, cmdline->xoptions); + cmdline->nxoption = 0; + cmdline->xoptions = NULL; } @@ -129,6 +137,12 @@ _PyPreConfig_Copy(_PyPreConfig *config, const _PyPreConfig *config2) COPY_ATTR(isolated); COPY_ATTR(use_environment); + COPY_ATTR(coerce_c_locale); + COPY_ATTR(coerce_c_locale_warn); +#ifdef MS_WINDOWS + COPY_ATTR(legacy_windows_fs_encoding); +#endif + COPY_ATTR(utf8_mode); #undef COPY_ATTR return 0; @@ -149,6 +163,10 @@ _PyPreConfig_GetGlobalConfig(_PyPreConfig *config) COPY_FLAG(isolated, Py_IsolatedFlag); COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag); +#ifdef MS_WINDOWS + COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag); +#endif + COPY_FLAG(utf8_mode, Py_UTF8Mode); #undef COPY_FLAG #undef COPY_NOT_FLAG @@ -169,14 +187,161 @@ _PyPreConfig_SetGlobalConfig(const _PyPreConfig *config) COPY_FLAG(isolated, Py_IsolatedFlag); COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag); +#ifdef MS_WINDOWS + COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag); +#endif + COPY_FLAG(utf8_mode, Py_UTF8Mode); #undef COPY_FLAG #undef COPY_NOT_FLAG } -_PyInitError -_PyPreConfig_Read(_PyPreConfig *config) +const char* +_PyPreConfig_GetEnv(const _PyPreConfig *config, const char *name) +{ + assert(config->use_environment >= 0); + + if (!config->use_environment) { + return NULL; + } + + const char *var = getenv(name); + if (var && var[0] != '\0') { + return var; + } + else { + return NULL; + } +} + + +int +_Py_str_to_int(const char *str, int *result) +{ + const char *endptr = str; + errno = 0; + long value = strtol(str, (char **)&endptr, 10); + if (*endptr != '\0' || errno == ERANGE) { + return -1; + } + if (value < INT_MIN || value > INT_MAX) { + return -1; + } + + *result = (int)value; + return 0; +} + + +void +_Py_get_env_flag(_PyPreConfig *config, int *flag, const char *name) +{ + const char *var = _PyPreConfig_GetEnv(config, name); + if (!var) { + return; + } + int value; + if (_Py_str_to_int(var, &value) < 0 || value < 0) { + /* PYTHONDEBUG=text and PYTHONDEBUG=-2 behave as PYTHONDEBUG=1 */ + value = 1; + } + if (*flag < value) { + *flag = value; + } +} + + +const wchar_t* +_Py_get_xoption(int nxoption, wchar_t * const *xoptions, const wchar_t *name) +{ + for (int i=0; i < nxoption; i++) { + const wchar_t *option = xoptions[i]; + size_t len; + wchar_t *sep = wcschr(option, L'='); + if (sep != NULL) { + len = (sep - option); + } + else { + len = wcslen(option); + } + if (wcsncmp(option, name, len) == 0 && name[len] == L'\0') { + return option; + } + } + return NULL; +} + + +static _PyInitError +preconfig_init_utf8_mode(_PyPreConfig *config, const _PyPreCmdline *cmdline) +{ + const wchar_t *xopt; + if (cmdline) { + xopt = _Py_get_xoption(cmdline->nxoption, cmdline->xoptions, L"utf8"); + } + else { + xopt = NULL; + } + if (xopt) { + wchar_t *sep = wcschr(xopt, L'='); + if (sep) { + xopt = sep + 1; + if (wcscmp(xopt, L"1") == 0) { + config->utf8_mode = 1; + } + else if (wcscmp(xopt, L"0") == 0) { + config->utf8_mode = 0; + } + else { + return _Py_INIT_USER_ERR("invalid -X utf8 option value"); + } + } + else { + config->utf8_mode = 1; + } + return _Py_INIT_OK(); + } + + const char *opt = _PyPreConfig_GetEnv(config, "PYTHONUTF8"); + if (opt) { + if (strcmp(opt, "1") == 0) { + config->utf8_mode = 1; + } + else if (strcmp(opt, "0") == 0) { + config->utf8_mode = 0; + } + else { + return _Py_INIT_USER_ERR("invalid PYTHONUTF8 environment " + "variable value"); + } + return _Py_INIT_OK(); + } + + return _Py_INIT_OK(); +} + + +static void +preconfig_init_locale(_PyPreConfig *config) +{ + /* Test also if coerce_c_locale equals 1: PYTHONCOERCECLOCALE=1 doesn't + imply that the C locale is always coerced. It is only coerced if + if the LC_CTYPE locale is "C". */ + if (config->coerce_c_locale != 0) { + /* The C locale enables the C locale coercion (PEP 538) */ + if (_Py_LegacyLocaleDetected()) { + config->coerce_c_locale = 1; + } + else { + config->coerce_c_locale = 0; + } + } +} + + +static _PyInitError +preconfig_read(_PyPreConfig *config, const _PyPreCmdline *cmdline) { _PyPreConfig_GetGlobalConfig(config); @@ -189,6 +354,69 @@ _PyPreConfig_Read(_PyPreConfig *config) config->use_environment = 0; } + if (config->use_environment) { +#ifdef MS_WINDOWS + _Py_get_env_flag(config, &config->legacy_windows_fs_encoding, + "PYTHONLEGACYWINDOWSFSENCODING"); +#endif + + const char *env = _PyPreConfig_GetEnv(config, "PYTHONCOERCECLOCALE"); + if (env) { + if (strcmp(env, "0") == 0) { + if (config->coerce_c_locale < 0) { + config->coerce_c_locale = 0; + } + } + else if (strcmp(env, "warn") == 0) { + config->coerce_c_locale_warn = 1; + } + else { + if (config->coerce_c_locale < 0) { + config->coerce_c_locale = 1; + } + } + } + } + +#ifdef MS_WINDOWS + if (config->legacy_windows_fs_encoding) { + config->utf8_mode = 0; + } +#endif + + if (config->utf8_mode < 0) { + _PyInitError err = preconfig_init_utf8_mode(config, cmdline); + if (_Py_INIT_FAILED(err)) { + return err; + } + } + + if (config->coerce_c_locale != 0) { + preconfig_init_locale(config); + } + +#ifndef MS_WINDOWS + if (config->utf8_mode < 0) { + /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ + const char *ctype_loc = setlocale(LC_CTYPE, NULL); + if (ctype_loc != NULL + && (strcmp(ctype_loc, "C") == 0 + || strcmp(ctype_loc, "POSIX") == 0)) + { + config->utf8_mode = 1; + } + } +#endif + + if (config->coerce_c_locale < 0) { + config->coerce_c_locale = 0; + } + if (config->utf8_mode < 0) { + config->utf8_mode = 0; + } + + assert(config->coerce_c_locale >= 0); + assert(config->utf8_mode >= 0); assert(config->isolated >= 0); assert(config->use_environment >= 0); @@ -196,6 +424,13 @@ _PyPreConfig_Read(_PyPreConfig *config) } +_PyInitError +_PyPreConfig_Read(_PyPreConfig *config) +{ + return preconfig_read(config, NULL); +} + + int _PyPreConfig_AsDict(const _PyPreConfig *config, PyObject *dict) { @@ -216,6 +451,12 @@ _PyPreConfig_AsDict(const _PyPreConfig *config, PyObject *dict) SET_ITEM_INT(isolated); SET_ITEM_INT(use_environment); + SET_ITEM_INT(coerce_c_locale); + SET_ITEM_INT(coerce_c_locale_warn); + SET_ITEM_INT(utf8_mode); +#ifdef MS_WINDOWS + SET_ITEM_INT(legacy_windows_fs_encoding); +#endif return 0; fail: @@ -251,6 +492,18 @@ preconfig_parse_cmdline(_PyPreConfig *config, _PyPreCmdline *cmdline) config->isolated++; break; + case 'X': + { + _PyInitError err; + err = _Py_wstrlist_append(&cmdline->nxoption, + &cmdline->xoptions, + _PyOS_optarg); + if (_Py_INIT_FAILED(err)) { + return err; + } + break; + } + default: /* ignore other argument: handled by _PyCoreConfig_ReadFromArgv() */ @@ -262,8 +515,8 @@ preconfig_parse_cmdline(_PyPreConfig *config, _PyPreCmdline *cmdline) } -_PyInitError -_PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args) +static _PyInitError +preconfig_from_argv(_PyPreConfig *config, const _PyArgv *args) { _PyInitError err; @@ -281,7 +534,7 @@ _PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args) goto done; } - err = _PyPreConfig_Read(config); + err = preconfig_read(config, &cmdline); if (_Py_INIT_FAILED(err)) { goto done; } @@ -293,7 +546,144 @@ done: } +/* Read the preconfiguration. */ +_PyInitError +_PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args) +{ + _PyInitError err; + + err = _PyRuntime_Initialize(); + if (_Py_INIT_FAILED(err)) { + return err; + } + + char *init_ctype_locale = NULL; + int init_utf8_mode = Py_UTF8Mode; +#ifdef MS_WINDOWS + int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag; +#endif + _PyPreConfig save_config = _PyPreConfig_INIT; + int locale_coerced = 0; + int loops = 0; + + /* copy LC_CTYPE locale */ + const char *loc = setlocale(LC_CTYPE, NULL); + if (loc == NULL) { + err = _Py_INIT_ERR("failed to LC_CTYPE locale"); + goto done; + } + init_ctype_locale = _PyMem_RawStrdup(loc); + if (init_ctype_locale == NULL) { + err = _Py_INIT_NO_MEMORY(); + goto done; + } + + if (_PyPreConfig_Copy(&save_config, config) < 0) { + err = _Py_INIT_NO_MEMORY(); + goto done; + } + + /* Set LC_CTYPE to the user preferred locale */ + _Py_SetLocaleFromEnv(LC_CTYPE); + + while (1) { + int utf8_mode = config->utf8_mode; + + /* Watchdog to prevent an infinite loop */ + loops++; + if (loops == 3) { + err = _Py_INIT_ERR("Encoding changed twice while " + "reading the configuration"); + goto done; + } + + /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend + on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */ + Py_UTF8Mode = config->utf8_mode; +#ifdef MS_WINDOWS + Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding; +#endif + + err = preconfig_from_argv(config, args); + if (_Py_INIT_FAILED(err)) { + goto done; + } + + if (locale_coerced) { + config->coerce_c_locale = 1; + } + + /* The legacy C locale assumes ASCII as the default text encoding, which + * causes problems not only for the CPython runtime, but also other + * components like GNU readline. + * + * Accordingly, when the CLI detects it, it attempts to coerce it to a + * more capable UTF-8 based alternative. + * + * See the documentation of the PYTHONCOERCECLOCALE setting for more + * details. + */ + int encoding_changed = 0; + if (config->coerce_c_locale && !locale_coerced) { + locale_coerced = 1; + _Py_CoerceLegacyLocale(0); + encoding_changed = 1; + } + + if (utf8_mode == -1) { + if (config->utf8_mode == 1) { + /* UTF-8 Mode enabled */ + encoding_changed = 1; + } + } + else { + if (config->utf8_mode != utf8_mode) { + encoding_changed = 1; + } + } + + if (!encoding_changed) { + break; + } + + /* Reset the configuration before reading again the configuration, + just keep UTF-8 Mode value. */ + int new_utf8_mode = config->utf8_mode; + int new_coerce_c_locale = config->coerce_c_locale; + if (_PyPreConfig_Copy(config, &save_config) < 0) { + err = _Py_INIT_NO_MEMORY(); + goto done; + } + config->utf8_mode = new_utf8_mode; + config->coerce_c_locale = new_coerce_c_locale; + + /* The encoding changed: read again the configuration + with the new encoding */ + } + err = _Py_INIT_OK(); + +done: + if (init_ctype_locale != NULL) { + setlocale(LC_CTYPE, init_ctype_locale); + } + _PyPreConfig_Clear(&save_config); + Py_UTF8Mode = init_utf8_mode ; +#ifdef MS_WINDOWS + Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding; +#endif + return err; +} + + void _PyPreConfig_Write(const _PyPreConfig *config) { + _PyPreConfig_SetGlobalConfig(config); + + if (config->coerce_c_locale) { + _Py_CoerceLegacyLocale(config->coerce_c_locale_warn); + } + + /* Set LC_CTYPE to the user preferred locale */ + _Py_SetLocaleFromEnv(LC_CTYPE); } |