aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/test/test_c_locale_coercion.py
diff options
context:
space:
mode:
authorNick Coghlan <ncoghlan@gmail.com>2017-06-18 12:29:42 +1000
committerGitHub <noreply@github.com>2017-06-18 12:29:42 +1000
commiteb81795d7d3a8c898fa89a376d63fc3bbfb9a081 (patch)
treed991860d1d359c87dc4b609c421ad746fbc06d21 /Lib/test/test_c_locale_coercion.py
parent6a98a04e21fc3209b2170c3930ce4488e89680e3 (diff)
downloadcpython-eb81795d7d3a8c898fa89a376d63fc3bbfb9a081.tar.gz
cpython-eb81795d7d3a8c898fa89a376d63fc3bbfb9a081.zip
bpo-30565: Add PYTHONCOERCECLOCALE=warn runtime flag (GH-2260)
- removes PY_WARN_ON_C_LOCALE build time flag - locale coercion and compatibility warnings are now always compiled in, but are off by default - adds PYTHONCOERCECLOCALE=warn runtime option to aid in debugging potentially locale related compatibility problems Due to not-yet-resolved test failures on *BSD systems (including Mac OS X), this also temporarily disables UTF-8 as a locale coercion target, and skips testing the interpreter's behavior in the POSIX locale.
Diffstat (limited to 'Lib/test/test_c_locale_coercion.py')
-rw-r--r--Lib/test/test_c_locale_coercion.py208
1 files changed, 130 insertions, 78 deletions
diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py
index aa0771c798d..a4b4626756a 100644
--- a/Lib/test/test_c_locale_coercion.py
+++ b/Lib/test/test_c_locale_coercion.py
@@ -22,13 +22,23 @@ if sys.platform == "darwin":
else:
C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING
-# XXX (ncoghlan): The above is probably still wrong for:
+# Note that the above is probably still wrong in some cases, such as:
# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
# * AIX and any other platforms that use latin-1 in the C locale
+#
+# Options for dealing with this:
+# * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't)
+# * Fix the test expectations to match the actual platform behaviour
# In order to get the warning messages to match up as expected, the candidate
# order here must much the target locale order in Python/pylifecycle.c
-_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8")
+_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8")
+
+# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
+# problems encountered on *BSD systems with those test cases
+# For additional details see:
+# nl_langinfo CODESET error: https://bugs.python.org/issue30647
+# locale handling differences: https://bugs.python.org/issue30672
# There's no reliable cross-platform way of checking locale alias
# lists, so the only way of knowing which of these locales will work
@@ -40,20 +50,24 @@ def _set_locale_in_subprocess(locale_name):
result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
return result.rc == 0
-_EncodingDetails = namedtuple("EncodingDetails",
- "fsencoding stdin_info stdout_info stderr_info")
+_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
+_EncodingDetails = namedtuple("EncodingDetails", _fields)
class EncodingDetails(_EncodingDetails):
+ # XXX (ncoghlan): Using JSON for child state reporting may be less fragile
CHILD_PROCESS_SCRIPT = ";".join([
- "import sys",
+ "import sys, os",
"print(sys.getfilesystemencoding())",
"print(sys.stdin.encoding + ':' + sys.stdin.errors)",
"print(sys.stdout.encoding + ':' + sys.stdout.errors)",
"print(sys.stderr.encoding + ':' + sys.stderr.errors)",
+ "print(os.environ.get('LANG', 'not set'))",
+ "print(os.environ.get('LC_CTYPE', 'not set'))",
+ "print(os.environ.get('LC_ALL', 'not set'))",
])
@classmethod
- def get_expected_details(cls, fs_encoding, stream_encoding):
+ def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars):
"""Returns expected child process details for a given encoding"""
_stream = stream_encoding + ":{}"
# stdin and stdout should use surrogateescape either because the
@@ -61,7 +75,14 @@ class EncodingDetails(_EncodingDetails):
stream_info = 2*[_stream.format("surrogateescape")]
# stderr should always use backslashreplace
stream_info.append(_stream.format("backslashreplace"))
- return dict(cls(fs_encoding, *stream_info)._asdict())
+ expected_lang = env_vars.get("LANG", "not set").lower()
+ if coercion_expected:
+ expected_lc_ctype = CLI_COERCION_TARGET.lower()
+ else:
+ expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower()
+ expected_lc_all = env_vars.get("LC_ALL", "not set").lower()
+ env_info = expected_lang, expected_lc_ctype, expected_lc_all
+ return dict(cls(fs_encoding, *stream_info, *env_info)._asdict())
@staticmethod
def _handle_output_variations(data):
@@ -97,64 +118,20 @@ class EncodingDetails(_EncodingDetails):
result.fail(py_cmd)
# All subprocess outputs in this test case should be pure ASCII
adjusted_output = cls._handle_output_variations(result.out)
- stdout_lines = adjusted_output.decode("ascii").rstrip().splitlines()
+ stdout_lines = adjusted_output.decode("ascii").splitlines()
child_encoding_details = dict(cls(*stdout_lines)._asdict())
stderr_lines = result.err.decode("ascii").rstrip().splitlines()
return child_encoding_details, stderr_lines
-class _ChildProcessEncodingTestCase(unittest.TestCase):
- # Base class to check for expected encoding details in a child process
-
- def _check_child_encoding_details(self,
- env_vars,
- expected_fs_encoding,
- expected_stream_encoding,
- expected_warning):
- """Check the C locale handling for the given process environment
-
- Parameters:
- expected_fs_encoding: expected sys.getfilesystemencoding() result
- expected_stream_encoding: expected encoding for standard streams
- expected_warning: stderr output to expect (if any)
- """
- result = EncodingDetails.get_child_details(env_vars)
- encoding_details, stderr_lines = result
- self.assertEqual(encoding_details,
- EncodingDetails.get_expected_details(
- expected_fs_encoding,
- expected_stream_encoding))
- self.assertEqual(stderr_lines, expected_warning)
-
# Details of the shared library warning emitted at runtime
-LIBRARY_C_LOCALE_WARNING = (
+LEGACY_LOCALE_WARNING = (
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended."
)
-@unittest.skipUnless(sysconfig.get_config_var("PY_WARN_ON_C_LOCALE"),
- "C locale runtime warning disabled at build time")
-class LocaleWarningTests(_ChildProcessEncodingTestCase):
- # Test warning emitted when running in the C locale
-
- def test_library_c_locale_warning(self):
- self.maxDiff = None
- for locale_to_set in ("C", "POSIX", "invalid.ascii"):
- # XXX (ncoghlan): Mac OS X doesn't behave as expected in the
- # POSIX locale, so we skip that for now
- if sys.platform == "darwin" and locale_to_set == "POSIX":
- continue
- var_dict = {
- "LC_ALL": locale_to_set
- }
- with self.subTest(forced_locale=locale_to_set):
- self._check_child_encoding_details(var_dict,
- C_LOCALE_FS_ENCODING,
- C_LOCALE_STREAM_ENCODING,
- [LIBRARY_C_LOCALE_WARNING])
-
# Details of the CLI locale coercion warning emitted at runtime
CLI_COERCION_WARNING_FMT = (
"Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "
@@ -163,9 +140,13 @@ CLI_COERCION_WARNING_FMT = (
AVAILABLE_TARGETS = None
+CLI_COERCION_TARGET = None
+CLI_COERCION_WARNING = None
def setUpModule():
global AVAILABLE_TARGETS
+ global CLI_COERCION_TARGET
+ global CLI_COERCION_WARNING
if AVAILABLE_TARGETS is not None:
# initialization already done
@@ -177,26 +158,57 @@ def setUpModule():
if _set_locale_in_subprocess(target_locale):
AVAILABLE_TARGETS.append(target_locale)
+ if AVAILABLE_TARGETS:
+ # Coercion is expected to use the first available target locale
+ CLI_COERCION_TARGET = AVAILABLE_TARGETS[0]
+ CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET)
-class _LocaleCoercionTargetsTestCase(_ChildProcessEncodingTestCase):
- # Base class for test cases that rely on coercion targets being defined
+class _LocaleHandlingTestCase(unittest.TestCase):
+ # Base class to check expected locale handling behaviour
- @classmethod
- def setUpClass(cls):
- if not AVAILABLE_TARGETS:
- raise unittest.SkipTest("No C-with-UTF-8 locale available")
+ def _check_child_encoding_details(self,
+ env_vars,
+ expected_fs_encoding,
+ expected_stream_encoding,
+ expected_warnings,
+ coercion_expected):
+ """Check the C locale handling for the given process environment
+ Parameters:
+ expected_fs_encoding: expected sys.getfilesystemencoding() result
+ expected_stream_encoding: expected encoding for standard streams
+ expected_warning: stderr output to expect (if any)
+ """
+ result = EncodingDetails.get_child_details(env_vars)
+ encoding_details, stderr_lines = result
+ expected_details = EncodingDetails.get_expected_details(
+ coercion_expected,
+ expected_fs_encoding,
+ expected_stream_encoding,
+ env_vars
+ )
+ self.assertEqual(encoding_details, expected_details)
+ if expected_warnings is None:
+ expected_warnings = []
+ self.assertEqual(stderr_lines, expected_warnings)
-class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
+
+class LocaleConfigurationTests(_LocaleHandlingTestCase):
# Test explicit external configuration via the process environment
+ def setUpClass():
+ # This relies on setupModule() having been run, so it can't be
+ # handled via the @unittest.skipUnless decorator
+ if not AVAILABLE_TARGETS:
+ raise unittest.SkipTest("No C-with-UTF-8 locale available")
+
def test_external_target_locale_configuration(self):
+
# Explicitly setting a target locale should give the same behaviour as
# is seen when implicitly coercing to that target locale
self.maxDiff = None
- expected_warning = []
expected_fs_encoding = "utf-8"
expected_stream_encoding = "utf-8"
@@ -209,6 +221,7 @@ class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
for locale_to_set in AVAILABLE_TARGETS:
# XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
# expected, so skip that combination for now
+ # See https://bugs.python.org/issue30672 for discussion
if env_var == "LANG" and locale_to_set == "UTF-8":
continue
@@ -219,17 +232,23 @@ class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
self._check_child_encoding_details(var_dict,
expected_fs_encoding,
expected_stream_encoding,
- expected_warning)
+ expected_warnings=None,
+ coercion_expected=False)
@test.support.cpython_only
@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"),
"C locale coercion disabled at build time")
-class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):
+class LocaleCoercionTests(_LocaleHandlingTestCase):
# Test implicit reconfiguration of the environment during CLI startup
- def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale):
+ def _check_c_locale_coercion(self,
+ fs_encoding, stream_encoding,
+ coerce_c_locale,
+ expected_warnings=None,
+ coercion_expected=True,
+ **extra_vars):
"""Check the C locale handling for various configurations
Parameters:
@@ -238,27 +257,31 @@ class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):
coerce_c_locale: setting to use for PYTHONCOERCECLOCALE
None: don't set the variable at all
str: the value set in the child's environment
+ expected_warnings: expected warning lines on stderr
+ extra_vars: additional environment variables to set in subprocess
"""
-
- # Check for expected warning on stderr if C locale is coerced
self.maxDiff = None
- expected_warning = []
- if coerce_c_locale != "0":
- # Expect coercion to use the first available locale
- warning_msg = CLI_COERCION_WARNING_FMT.format(AVAILABLE_TARGETS[0])
- expected_warning.append(warning_msg)
+ if not AVAILABLE_TARGETS:
+ # Locale coercion is disabled when there aren't any target locales
+ fs_encoding = C_LOCALE_FS_ENCODING
+ stream_encoding = C_LOCALE_STREAM_ENCODING
+ coercion_expected = False
+ if expected_warnings:
+ expected_warnings = [LEGACY_LOCALE_WARNING]
base_var_dict = {
"LANG": "",
"LC_CTYPE": "",
"LC_ALL": "",
}
+ base_var_dict.update(extra_vars)
for env_var in ("LANG", "LC_CTYPE"):
for locale_to_set in ("", "C", "POSIX", "invalid.ascii"):
- # XXX (ncoghlan): Mac OS X doesn't behave as expected in the
+ # XXX (ncoghlan): *BSD platforms don't behave as expected in the
# POSIX locale, so we skip that for now
- if sys.platform == "darwin" and locale_to_set == "POSIX":
+ # See https://bugs.python.org/issue30672 for discussion
+ if locale_to_set == "POSIX":
continue
with self.subTest(env_var=env_var,
nominal_locale=locale_to_set,
@@ -267,33 +290,62 @@ class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):
var_dict[env_var] = locale_to_set
if coerce_c_locale is not None:
var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
+ # Check behaviour on successful coercion
self._check_child_encoding_details(var_dict,
fs_encoding,
stream_encoding,
- expected_warning)
+ expected_warnings,
+ coercion_expected)
def test_test_PYTHONCOERCECLOCALE_not_set(self):
# This should coerce to the first available target locale by default
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)
def test_PYTHONCOERCECLOCALE_not_zero(self):
- # *Any* string other that "0" is considered "set" for our purposes
+ # *Any* string other than "0" is considered "set" for our purposes
# and hence should result in the locale coercion being enabled
for setting in ("", "1", "true", "false"):
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting)
+ def test_PYTHONCOERCECLOCALE_set_to_warn(self):
+ # PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales
+ self._check_c_locale_coercion("utf-8", "utf-8",
+ coerce_c_locale="warn",
+ expected_warnings=[CLI_COERCION_WARNING])
+
+
def test_PYTHONCOERCECLOCALE_set_to_zero(self):
# The setting "0" should result in the locale coercion being disabled
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
C_LOCALE_STREAM_ENCODING,
- coerce_c_locale="0")
+ coerce_c_locale="0",
+ coercion_expected=False)
+ # Setting LC_ALL=C shouldn't make any difference to the behaviour
+ self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
+ C_LOCALE_STREAM_ENCODING,
+ coerce_c_locale="0",
+ LC_ALL="C",
+ coercion_expected=False)
+ def test_LC_ALL_set_to_C(self):
+ # Setting LC_ALL should render the locale coercion ineffective
+ self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
+ C_LOCALE_STREAM_ENCODING,
+ coerce_c_locale=None,
+ LC_ALL="C",
+ coercion_expected=False)
+ # And result in a warning about a lack of locale compatibility
+ self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
+ C_LOCALE_STREAM_ENCODING,
+ coerce_c_locale="warn",
+ LC_ALL="C",
+ expected_warnings=[LEGACY_LOCALE_WARNING],
+ coercion_expected=False)
def test_main():
test.support.run_unittest(
LocaleConfigurationTests,
- LocaleCoercionTests,
- LocaleWarningTests
+ LocaleCoercionTests
)
test.support.reap_children()