diff options
Diffstat (limited to 'Lib/string.py')
-rw-r--r-- | Lib/string.py | 423 |
1 files changed, 30 insertions, 393 deletions
diff --git a/Lib/string.py b/Lib/string.py index 97278034a0f..0f4ede23e14 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -1,43 +1,31 @@ -"""A collection of string operations (most are no longer used). - -Warning: most of the code you see here isn't normally used nowadays. -Beginning with Python 1.6, many of these functions are implemented as -methods on the standard string object. They used to be implemented by -a built-in module called strop, but strop is now obsolete itself. +"""A collection of string constants. Public module variables: -whitespace -- a string containing all characters considered whitespace -lowercase -- a string containing all characters considered lowercase letters -uppercase -- a string containing all characters considered uppercase letters -letters -- a string containing all characters considered letters -digits -- a string containing all characters considered decimal digits -hexdigits -- a string containing all characters considered hexadecimal digits -octdigits -- a string containing all characters considered octal digits -punctuation -- a string containing all characters considered punctuation -printable -- a string containing all characters considered printable +whitespace -- a string containing all ASCII whitespace +ascii_lowercase -- a string containing all ASCII lowercase letters +ascii_uppercase -- a string containing all ASCII uppercase letters +ascii_letters -- a string containing all ASCII letters +digits -- a string containing all ASCII decimal digits +hexdigits -- a string containing all ASCII hexadecimal digits +octdigits -- a string containing all ASCII octal digits +punctuation -- a string containing all ASCII punctuation characters +printable -- a string containing all ASCII characters considered printable """ +import _string + # Some strings for ctype-style character classification whitespace = ' \t\n\r\v\f' -lowercase = 'abcdefghijklmnopqrstuvwxyz' -uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' -letters = lowercase + uppercase -ascii_lowercase = lowercase -ascii_uppercase = uppercase +ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz' +ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' ascii_letters = ascii_lowercase + ascii_uppercase digits = '0123456789' hexdigits = digits + 'abcdef' + 'ABCDEF' octdigits = '01234567' punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" -printable = digits + letters + punctuation + whitespace - -# Case conversion helpers -# Use str to convert Unicode literal in case of -U -l = map(chr, xrange(256)) -_idmap = str('').join(l) -del l +printable = digits + ascii_letters + punctuation + whitespace # Functions which aren't available as string methods. @@ -56,29 +44,6 @@ def capwords(s, sep=None): return (sep or ' ').join(x.capitalize() for x in s.split(sep)) -# Construct a translation string -_idmapL = None -def maketrans(fromstr, tostr): - """maketrans(frm, to) -> string - - Return a translation table (a string of 256 bytes long) - suitable for use in string.translate. The strings frm and to - must be of the same length. - - """ - if len(fromstr) != len(tostr): - raise ValueError, "maketrans arguments must have same length" - global _idmapL - if not _idmapL: - _idmapL = list(_idmap) - L = _idmapL[:] - fromstr = map(ord, fromstr) - for i in range(len(fromstr)): - L[fromstr[i]] = tostr[i] - return ''.join(L) - - - #################################################################### import re as _re @@ -118,15 +83,15 @@ class _TemplateMetaclass(type): 'delim' : _re.escape(cls.delimiter), 'id' : cls.idpattern, } - cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE) + cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE) -class Template: +class Template(metaclass=_TemplateMetaclass): """A string class for supporting $-substitutions.""" - __metaclass__ = _TemplateMetaclass delimiter = '$' idpattern = r'[_a-z][_a-z0-9]*' + flags = _re.IGNORECASE def __init__(self, template): self.template = template @@ -182,365 +147,35 @@ class Template: mapping = args[0] # Helper function for .sub() def convert(mo): - named = mo.group('named') + named = mo.group('named') or mo.group('braced') if named is not None: try: # We use this idiom instead of str() because the latter # will fail if val is a Unicode containing non-ASCII return '%s' % (mapping[named],) except KeyError: - return self.delimiter + named - braced = mo.group('braced') - if braced is not None: - try: - return '%s' % (mapping[braced],) - except KeyError: - return self.delimiter + '{' + braced + '}' + return mo.group() if mo.group('escaped') is not None: return self.delimiter if mo.group('invalid') is not None: - return self.delimiter + return mo.group() raise ValueError('Unrecognized named group in pattern', self.pattern) return self.pattern.sub(convert, self.template) -#################################################################### -# NOTE: Everything below here is deprecated. Use string methods instead. -# This stuff will go away in Python 3.0. - -# Backward compatible names for exceptions -index_error = ValueError -atoi_error = ValueError -atof_error = ValueError -atol_error = ValueError - -# convert UPPER CASE letters to lower case -def lower(s): - """lower(s) -> string - - Return a copy of the string s converted to lowercase. - - """ - return s.lower() - -# Convert lower case letters to UPPER CASE -def upper(s): - """upper(s) -> string - - Return a copy of the string s converted to uppercase. - - """ - return s.upper() - -# Swap lower case letters and UPPER CASE -def swapcase(s): - """swapcase(s) -> string - - Return a copy of the string s with upper case characters - converted to lowercase and vice versa. - - """ - return s.swapcase() - -# Strip leading and trailing tabs and spaces -def strip(s, chars=None): - """strip(s [,chars]) -> string - - Return a copy of the string s with leading and trailing - whitespace removed. - If chars is given and not None, remove characters in chars instead. - If chars is unicode, S will be converted to unicode before stripping. - - """ - return s.strip(chars) - -# Strip leading tabs and spaces -def lstrip(s, chars=None): - """lstrip(s [,chars]) -> string - - Return a copy of the string s with leading whitespace removed. - If chars is given and not None, remove characters in chars instead. - - """ - return s.lstrip(chars) - -# Strip trailing tabs and spaces -def rstrip(s, chars=None): - """rstrip(s [,chars]) -> string - - Return a copy of the string s with trailing whitespace removed. - If chars is given and not None, remove characters in chars instead. - - """ - return s.rstrip(chars) - - -# Split a string into a list of space/tab-separated words -def split(s, sep=None, maxsplit=-1): - """split(s [,sep [,maxsplit]]) -> list of strings - - Return a list of the words in the string s, using sep as the - delimiter string. If maxsplit is given, splits at no more than - maxsplit places (resulting in at most maxsplit+1 words). If sep - is not specified or is None, any whitespace string is a separator. - - (split and splitfields are synonymous) - - """ - return s.split(sep, maxsplit) -splitfields = split - -# Split a string into a list of space/tab-separated words -def rsplit(s, sep=None, maxsplit=-1): - """rsplit(s [,sep [,maxsplit]]) -> list of strings - - Return a list of the words in the string s, using sep as the - delimiter string, starting at the end of the string and working - to the front. If maxsplit is given, at most maxsplit splits are - done. If sep is not specified or is None, any whitespace string - is a separator. - """ - return s.rsplit(sep, maxsplit) - -# Join fields with optional separator -def join(words, sep = ' '): - """join(list [,sep]) -> string - - Return a string composed of the words in list, with - intervening occurrences of sep. The default separator is a - single space. - - (joinfields and join are synonymous) - - """ - return sep.join(words) -joinfields = join - -# Find substring, raise exception if not found -def index(s, *args): - """index(s, sub [,start [,end]]) -> int - - Like find but raises ValueError when the substring is not found. - - """ - return s.index(*args) - -# Find last substring, raise exception if not found -def rindex(s, *args): - """rindex(s, sub [,start [,end]]) -> int - - Like rfind but raises ValueError when the substring is not found. - - """ - return s.rindex(*args) - -# Count non-overlapping occurrences of substring -def count(s, *args): - """count(s, sub[, start[,end]]) -> int - - Return the number of occurrences of substring sub in string - s[start:end]. Optional arguments start and end are - interpreted as in slice notation. - - """ - return s.count(*args) - -# Find substring, return -1 if not found -def find(s, *args): - """find(s, sub [,start [,end]]) -> in - - Return the lowest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. - - Return -1 on failure. - - """ - return s.find(*args) - -# Find last substring, return -1 if not found -def rfind(s, *args): - """rfind(s, sub [,start [,end]]) -> int - - Return the highest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. - - Return -1 on failure. - - """ - return s.rfind(*args) - -# for a bit of speed -_float = float -_int = int -_long = long - -# Convert string to float -def atof(s): - """atof(s) -> float - - Return the floating point number represented by the string s. - - """ - return _float(s) - - -# Convert string to integer -def atoi(s , base=10): - """atoi(s [,base]) -> int - - Return the integer represented by the string s in the given - base, which defaults to 10. The string s must consist of one - or more digits, possibly preceded by a sign. If base is 0, it - is chosen from the leading characters of s, 0 for octal, 0x or - 0X for hexadecimal. If base is 16, a preceding 0x or 0X is - accepted. - - """ - return _int(s, base) - - -# Convert string to long integer -def atol(s, base=10): - """atol(s [,base]) -> long - - Return the long integer represented by the string s in the - given base, which defaults to 10. The string s must consist - of one or more digits, possibly preceded by a sign. If base - is 0, it is chosen from the leading characters of s, 0 for - octal, 0x or 0X for hexadecimal. If base is 16, a preceding - 0x or 0X is accepted. A trailing L or l is not accepted, - unless base is 0. - - """ - return _long(s, base) - - -# Left-justify a string -def ljust(s, width, *args): - """ljust(s, width[, fillchar]) -> string - - Return a left-justified version of s, in a field of the - specified width, padded with spaces as needed. The string is - never truncated. If specified the fillchar is used instead of spaces. - - """ - return s.ljust(width, *args) - -# Right-justify a string -def rjust(s, width, *args): - """rjust(s, width[, fillchar]) -> string - - Return a right-justified version of s, in a field of the - specified width, padded with spaces as needed. The string is - never truncated. If specified the fillchar is used instead of spaces. - - """ - return s.rjust(width, *args) - -# Center a string -def center(s, width, *args): - """center(s, width[, fillchar]) -> string - - Return a center version of s, in a field of the specified - width. padded with spaces as needed. The string is never - truncated. If specified the fillchar is used instead of spaces. - - """ - return s.center(width, *args) - -# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' -# Decadent feature: the argument may be a string or a number -# (Use of this is deprecated; it should be a string as with ljust c.s.) -def zfill(x, width): - """zfill(x, width) -> string - - Pad a numeric string x with zeros on the left, to fill a field - of the specified width. The string x is never truncated. - - """ - if not isinstance(x, basestring): - x = repr(x) - return x.zfill(width) - -# Expand tabs in a string. -# Doesn't take non-printing chars into account, but does understand \n. -def expandtabs(s, tabsize=8): - """expandtabs(s [,tabsize]) -> string - - Return a copy of the string s with all tab characters replaced - by the appropriate number of spaces, depending on the current - column, and the tabsize (default 8). - - """ - return s.expandtabs(tabsize) - -# Character translation through look-up table. -def translate(s, table, deletions=""): - """translate(s,table [,deletions]) -> string - - Return a copy of the string s, where all characters occurring - in the optional argument deletions are removed, and the - remaining characters have been mapped through the given - translation table, which must be a string of length 256. The - deletions argument is not allowed for Unicode strings. - - """ - if deletions or table is None: - return s.translate(table, deletions) - else: - # Add s[:0] so that if s is Unicode and table is an 8-bit string, - # table is converted to Unicode. This means that table *cannot* - # be a dictionary -- for that feature, use u.translate() directly. - return s.translate(table + s[:0]) - -# Capitalize a string, e.g. "aBc dEf" -> "Abc def". -def capitalize(s): - """capitalize(s) -> string - - Return a copy of the string s with only its first character - capitalized. - - """ - return s.capitalize() - -# Substring replacement (global) -def replace(s, old, new, maxreplace=-1): - """replace (str, old, new[, maxreplace]) -> string - - Return a copy of string str with all occurrences of substring - old replaced by new. If the optional argument maxreplace is - given, only the first maxreplace occurrences are replaced. - - """ - return s.replace(old, new, maxreplace) - - -# Try importing optional built-in module "strop" -- if it exists, -# it redefines some string operations that are 100-1000 times faster. -# It also defines values for whitespace, lowercase and uppercase -# that match <ctype.h>'s definitions. - -try: - from strop import maketrans, lowercase, uppercase, whitespace - letters = lowercase + uppercase -except ImportError: - pass # Use the original versions - ######################################################################## # the Formatter class # see PEP 3101 for details and purpose of this class # The hard parts are reused from the C implementation. They're exposed as "_" -# prefixed methods of str and unicode. +# prefixed methods of str. -# The overall parser is implemented in str._formatter_parser. -# The field name parser is implemented in str._formatter_field_name_split +# The overall parser is implemented in _string.formatter_parser. +# The field name parser is implemented in _string.formatter_field_name_split -class Formatter(object): +class Formatter: def format(self, format_string, *args, **kwargs): return self.vformat(format_string, args, kwargs) @@ -585,7 +220,7 @@ class Formatter(object): def get_value(self, key, args, kwargs): - if isinstance(key, (int, long)): + if isinstance(key, int): return args[key] else: return kwargs[key] @@ -607,6 +242,8 @@ class Formatter(object): return str(value) elif conversion == 'r': return repr(value) + elif conversion == 'a': + return ascii(value) raise ValueError("Unknown conversion specifier {0!s}".format(conversion)) @@ -618,7 +255,7 @@ class Formatter(object): # if field_name is not None, it is looked up, formatted # with format_spec and conversion and then used def parse(self, format_string): - return format_string._formatter_parser() + return _string.formatter_parser(format_string) # given a field_name, find the object it references. @@ -627,7 +264,7 @@ class Formatter(object): # used_args: a set of which args have been used # args, kwargs: as passed in to vformat def get_field(self, field_name, args, kwargs): - first, rest = field_name._formatter_field_name_split() + first, rest = _string.formatter_field_name_split(field_name) obj = self.get_value(first, args, kwargs) |