aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/_strptime.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/_strptime.py')
-rw-r--r--Lib/_strptime.py202
1 files changed, 159 insertions, 43 deletions
diff --git a/Lib/_strptime.py b/Lib/_strptime.py
index ae67949626d..cdc55e8daaf 100644
--- a/Lib/_strptime.py
+++ b/Lib/_strptime.py
@@ -14,6 +14,7 @@ import os
import time
import locale
import calendar
+import re
from re import compile as re_compile
from re import sub as re_sub
from re import IGNORECASE
@@ -41,6 +42,29 @@ def _findall(haystack, needle):
yield i
i += len(needle)
+def _fixmonths(months):
+ yield from months
+ # The lower case of 'İ' ('\u0130') is 'i\u0307'.
+ # The re module only supports 1-to-1 character matching in
+ # case-insensitive mode.
+ for s in months:
+ if 'i\u0307' in s:
+ yield s.replace('i\u0307', '\u0130')
+
+lzh_TW_alt_digits = (
+ # 〇:一:二:三:四:五:六:七:八:九
+ '\u3007', '\u4e00', '\u4e8c', '\u4e09', '\u56db',
+ '\u4e94', '\u516d', '\u4e03', '\u516b', '\u4e5d',
+ # 十:十一:十二:十三:十四:十五:十六:十七:十八:十九
+ '\u5341', '\u5341\u4e00', '\u5341\u4e8c', '\u5341\u4e09', '\u5341\u56db',
+ '\u5341\u4e94', '\u5341\u516d', '\u5341\u4e03', '\u5341\u516b', '\u5341\u4e5d',
+ # 廿:廿一:廿二:廿三:廿四:廿五:廿六:廿七:廿八:廿九
+ '\u5eff', '\u5eff\u4e00', '\u5eff\u4e8c', '\u5eff\u4e09', '\u5eff\u56db',
+ '\u5eff\u4e94', '\u5eff\u516d', '\u5eff\u4e03', '\u5eff\u516b', '\u5eff\u4e5d',
+ # 卅:卅一
+ '\u5345', '\u5345\u4e00')
+
+
class LocaleTime(object):
"""Stores and handles locale-specific information related to time.
@@ -84,6 +108,7 @@ class LocaleTime(object):
self.__calc_weekday()
self.__calc_month()
self.__calc_am_pm()
+ self.__calc_alt_digits()
self.__calc_timezone()
self.__calc_date_time()
if _getlang() != self.lang:
@@ -119,9 +144,43 @@ class LocaleTime(object):
am_pm.append(time.strftime("%p", time_tuple).lower().strip())
self.am_pm = am_pm
+ def __calc_alt_digits(self):
+ # Set self.LC_alt_digits by using time.strftime().
+
+ # The magic data should contain all decimal digits.
+ time_tuple = time.struct_time((1998, 1, 27, 10, 43, 56, 1, 27, 0))
+ s = time.strftime("%x%X", time_tuple)
+ if s.isascii():
+ # Fast path -- all digits are ASCII.
+ self.LC_alt_digits = ()
+ return
+
+ digits = ''.join(sorted(set(re.findall(r'\d', s))))
+ if len(digits) == 10 and ord(digits[-1]) == ord(digits[0]) + 9:
+ # All 10 decimal digits from the same set.
+ if digits.isascii():
+ # All digits are ASCII.
+ self.LC_alt_digits = ()
+ return
+
+ self.LC_alt_digits = [a + b for a in digits for b in digits]
+ # Test whether the numbers contain leading zero.
+ time_tuple2 = time.struct_time((2000, 1, 1, 1, 1, 1, 5, 1, 0))
+ if self.LC_alt_digits[1] not in time.strftime("%x %X", time_tuple2):
+ self.LC_alt_digits[:10] = digits
+ return
+
+ # Either non-Gregorian calendar or non-decimal numbers.
+ if {'\u4e00', '\u4e03', '\u4e5d', '\u5341', '\u5eff'}.issubset(s):
+ # lzh_TW
+ self.LC_alt_digits = lzh_TW_alt_digits
+ return
+
+ self.LC_alt_digits = None
+
def __calc_date_time(self):
- # Set self.date_time, self.date, & self.time by using
- # time.strftime().
+ # Set self.LC_date_time, self.LC_date, self.LC_time and
+ # self.LC_time_ampm by using time.strftime().
# Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
# overloaded numbers is minimized. The order in which searches for
@@ -129,26 +188,32 @@ class LocaleTime(object):
# possible ambiguity for what something represents.
time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0))
- replacement_pairs = [
+ replacement_pairs = []
+
+ # Non-ASCII digits
+ if self.LC_alt_digits or self.LC_alt_digits is None:
+ for n, d in [(19, '%OC'), (99, '%Oy'), (22, '%OH'),
+ (44, '%OM'), (55, '%OS'), (17, '%Od'),
+ (3, '%Om'), (2, '%Ow'), (10, '%OI')]:
+ if self.LC_alt_digits is None:
+ s = chr(0x660 + n // 10) + chr(0x660 + n % 10)
+ replacement_pairs.append((s, d))
+ if n < 10:
+ replacement_pairs.append((s[1], d))
+ elif len(self.LC_alt_digits) > n:
+ replacement_pairs.append((self.LC_alt_digits[n], d))
+ else:
+ replacement_pairs.append((time.strftime(d, time_tuple), d))
+ replacement_pairs += [
('1999', '%Y'), ('99', '%y'), ('22', '%H'),
('44', '%M'), ('55', '%S'), ('76', '%j'),
('17', '%d'), ('03', '%m'), ('3', '%m'),
# '3' needed for when no leading zero.
('2', '%w'), ('10', '%I'),
- # Non-ASCII digits
- ('\u0661\u0669\u0669\u0669', '%Y'),
- ('\u0669\u0669', '%Oy'),
- ('\u0662\u0662', '%OH'),
- ('\u0664\u0664', '%OM'),
- ('\u0665\u0665', '%OS'),
- ('\u0661\u0667', '%Od'),
- ('\u0660\u0663', '%Om'),
- ('\u0663', '%Om'),
- ('\u0662', '%Ow'),
- ('\u0661\u0660', '%OI'),
]
+
date_time = []
- for directive in ('%c', '%x', '%X'):
+ for directive in ('%c', '%x', '%X', '%r'):
current_format = time.strftime(directive, time_tuple).lower()
current_format = current_format.replace('%', '%%')
# The month and the day of the week formats are treated specially
@@ -172,9 +237,10 @@ class LocaleTime(object):
if tz:
current_format = current_format.replace(tz, "%Z")
# Transform all non-ASCII digits to digits in range U+0660 to U+0669.
- current_format = re_sub(r'\d(?<![0-9])',
- lambda m: chr(0x0660 + int(m[0])),
- current_format)
+ if not current_format.isascii() and self.LC_alt_digits is None:
+ current_format = re_sub(r'\d(?<![0-9])',
+ lambda m: chr(0x0660 + int(m[0])),
+ current_format)
for old, new in replacement_pairs:
current_format = current_format.replace(old, new)
# If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
@@ -189,6 +255,7 @@ class LocaleTime(object):
self.LC_date_time = date_time[0]
self.LC_date = date_time[1]
self.LC_time = date_time[2]
+ self.LC_time_ampm = date_time[3]
def __find_month_format(self, directive):
"""Find the month format appropriate for the current locale.
@@ -213,7 +280,7 @@ class LocaleTime(object):
full_indices &= indices
indices = set(_findall(datetime, self.a_month[m]))
if abbr_indices is None:
- abbr_indices = indices
+ abbr_indices = set(indices)
else:
abbr_indices &= indices
if not full_indices and not abbr_indices:
@@ -241,7 +308,7 @@ class LocaleTime(object):
if self.f_weekday[wd] != self.a_weekday[wd]:
indices = set(_findall(datetime, self.a_weekday[wd]))
if abbr_indices is None:
- abbr_indices = indices
+ abbr_indices = set(indices)
else:
abbr_indices &= indices
if not full_indices and not abbr_indices:
@@ -288,8 +355,10 @@ class TimeRE(dict):
# The " [1-9]" part of the regex is to make %c from ANSI C work
'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
'f': r"(?P<f>[0-9]{1,6})",
- 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
+ 'H': r"(?P<H>2[0-3]|[0-1]\d|\d| \d)",
+ 'k': r"(?P<H>2[0-3]|[0-1]\d|\d| \d)",
'I': r"(?P<I>1[0-2]|0[1-9]|[1-9]| [1-9])",
+ 'l': r"(?P<I>1[0-2]|0[1-9]|[1-9]| [1-9])",
'G': r"(?P<G>\d\d\d\d)",
'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
@@ -305,23 +374,56 @@ class TimeRE(dict):
'z': r"(?P<z>([+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?)|(?-i:Z))?",
'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
- 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
- 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
+ 'B': self.__seqToRE(_fixmonths(self.locale_time.f_month[1:]), 'B'),
+ 'b': self.__seqToRE(_fixmonths(self.locale_time.a_month[1:]), 'b'),
'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone
for tz in tz_names),
'Z'),
'%': '%'}
- for d in 'dmyHIMS':
- mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d
- mapping['Ow'] = r'(?P<w>\d)'
+ if self.locale_time.LC_alt_digits is None:
+ for d in 'dmyCHIMS':
+ mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d
+ mapping['Ow'] = r'(?P<w>\d)'
+ else:
+ mapping.update({
+ 'Od': self.__seqToRE(self.locale_time.LC_alt_digits[1:32], 'd',
+ '3[0-1]|[1-2][0-9]|0[1-9]|[1-9]'),
+ 'Om': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'm',
+ '1[0-2]|0[1-9]|[1-9]'),
+ 'Ow': self.__seqToRE(self.locale_time.LC_alt_digits[:7], 'w',
+ '[0-6]'),
+ 'Oy': self.__seqToRE(self.locale_time.LC_alt_digits, 'y',
+ '[0-9][0-9]'),
+ 'OC': self.__seqToRE(self.locale_time.LC_alt_digits, 'C',
+ '[0-9][0-9]'),
+ 'OH': self.__seqToRE(self.locale_time.LC_alt_digits[:24], 'H',
+ '2[0-3]|[0-1][0-9]|[0-9]'),
+ 'OI': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'I',
+ '1[0-2]|0[1-9]|[1-9]'),
+ 'OM': self.__seqToRE(self.locale_time.LC_alt_digits[:60], 'M',
+ '[0-5][0-9]|[0-9]'),
+ 'OS': self.__seqToRE(self.locale_time.LC_alt_digits[:62], 'S',
+ '6[0-1]|[0-5][0-9]|[0-9]'),
+ })
+ mapping.update({
+ 'e': mapping['d'],
+ 'Oe': mapping['Od'],
+ 'P': mapping['p'],
+ 'Op': mapping['p'],
+ 'W': mapping['U'].replace('U', 'W'),
+ })
mapping['W'] = mapping['U'].replace('U', 'W')
+
base.__init__(mapping)
+ base.__setitem__('T', self.pattern('%H:%M:%S'))
+ base.__setitem__('R', self.pattern('%H:%M'))
+ base.__setitem__('r', self.pattern(self.locale_time.LC_time_ampm))
base.__setitem__('X', self.pattern(self.locale_time.LC_time))
base.__setitem__('x', self.pattern(self.locale_time.LC_date))
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
- def __seqToRE(self, to_convert, directive):
+ def __seqToRE(self, to_convert, directive, altregex=None):
"""Convert a list to a regex string for matching a directive.
Want possible matching values to be from longest to shortest. This
@@ -337,8 +439,9 @@ class TimeRE(dict):
else:
return ''
regex = '|'.join(re_escape(stuff) for stuff in to_convert)
- regex = '(?P<%s>%s' % (directive, regex)
- return '%s)' % regex
+ if altregex is not None:
+ regex += '|' + altregex
+ return '(?P<%s>%s)' % (directive, regex)
def pattern(self, format):
"""Return regex pattern for the format string.
@@ -365,7 +468,7 @@ class TimeRE(dict):
nonlocal day_of_month_in_format
day_of_month_in_format = True
return self[format_char]
- format = re_sub(r'%([OE]?\\?.?)', repl, format)
+ format = re_sub(r'%[-_0^#]*[0-9]*([OE]?\\?.?)', repl, format)
if day_of_month_in_format and not year_in_format:
import warnings
warnings.warn("""\
@@ -467,6 +570,15 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
# values
weekday = julian = None
found_dict = found.groupdict()
+ if locale_time.LC_alt_digits:
+ def parse_int(s):
+ try:
+ return locale_time.LC_alt_digits.index(s)
+ except ValueError:
+ return int(s)
+ else:
+ parse_int = int
+
for group_key in found_dict.keys():
# Directives not explicitly handled below:
# c, x, X
@@ -474,30 +586,34 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
# U, W
# worthless without day of the week
if group_key == 'y':
- year = int(found_dict['y'])
- # Open Group specification for strptime() states that a %y
- #value in the range of [00, 68] is in the century 2000, while
- #[69,99] is in the century 1900
- if year <= 68:
- year += 2000
+ year = parse_int(found_dict['y'])
+ if 'C' in found_dict:
+ century = parse_int(found_dict['C'])
+ year += century * 100
else:
- year += 1900
+ # Open Group specification for strptime() states that a %y
+ #value in the range of [00, 68] is in the century 2000, while
+ #[69,99] is in the century 1900
+ if year <= 68:
+ year += 2000
+ else:
+ year += 1900
elif group_key == 'Y':
year = int(found_dict['Y'])
elif group_key == 'G':
iso_year = int(found_dict['G'])
elif group_key == 'm':
- month = int(found_dict['m'])
+ month = parse_int(found_dict['m'])
elif group_key == 'B':
month = locale_time.f_month.index(found_dict['B'].lower())
elif group_key == 'b':
month = locale_time.a_month.index(found_dict['b'].lower())
elif group_key == 'd':
- day = int(found_dict['d'])
+ day = parse_int(found_dict['d'])
elif group_key == 'H':
- hour = int(found_dict['H'])
+ hour = parse_int(found_dict['H'])
elif group_key == 'I':
- hour = int(found_dict['I'])
+ hour = parse_int(found_dict['I'])
ampm = found_dict.get('p', '').lower()
# If there was no AM/PM indicator, we'll treat this like AM
if ampm in ('', locale_time.am_pm[0]):
@@ -513,9 +629,9 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
if hour != 12:
hour += 12
elif group_key == 'M':
- minute = int(found_dict['M'])
+ minute = parse_int(found_dict['M'])
elif group_key == 'S':
- second = int(found_dict['S'])
+ second = parse_int(found_dict['S'])
elif group_key == 'f':
s = found_dict['f']
# Pad to always return microseconds.