aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/string.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/string.py')
-rw-r--r--Lib/string.py423
1 files changed, 30 insertions, 393 deletions
diff --git a/Lib/string.py b/Lib/string.py
index 97278034a0f..0f4ede23e14 100644
--- a/Lib/string.py
+++ b/Lib/string.py
@@ -1,43 +1,31 @@
-"""A collection of string operations (most are no longer used).
-
-Warning: most of the code you see here isn't normally used nowadays.
-Beginning with Python 1.6, many of these functions are implemented as
-methods on the standard string object. They used to be implemented by
-a built-in module called strop, but strop is now obsolete itself.
+"""A collection of string constants.
Public module variables:
-whitespace -- a string containing all characters considered whitespace
-lowercase -- a string containing all characters considered lowercase letters
-uppercase -- a string containing all characters considered uppercase letters
-letters -- a string containing all characters considered letters
-digits -- a string containing all characters considered decimal digits
-hexdigits -- a string containing all characters considered hexadecimal digits
-octdigits -- a string containing all characters considered octal digits
-punctuation -- a string containing all characters considered punctuation
-printable -- a string containing all characters considered printable
+whitespace -- a string containing all ASCII whitespace
+ascii_lowercase -- a string containing all ASCII lowercase letters
+ascii_uppercase -- a string containing all ASCII uppercase letters
+ascii_letters -- a string containing all ASCII letters
+digits -- a string containing all ASCII decimal digits
+hexdigits -- a string containing all ASCII hexadecimal digits
+octdigits -- a string containing all ASCII octal digits
+punctuation -- a string containing all ASCII punctuation characters
+printable -- a string containing all ASCII characters considered printable
"""
+import _string
+
# Some strings for ctype-style character classification
whitespace = ' \t\n\r\v\f'
-lowercase = 'abcdefghijklmnopqrstuvwxyz'
-uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-letters = lowercase + uppercase
-ascii_lowercase = lowercase
-ascii_uppercase = uppercase
+ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
+ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
ascii_letters = ascii_lowercase + ascii_uppercase
digits = '0123456789'
hexdigits = digits + 'abcdef' + 'ABCDEF'
octdigits = '01234567'
punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
-printable = digits + letters + punctuation + whitespace
-
-# Case conversion helpers
-# Use str to convert Unicode literal in case of -U
-l = map(chr, xrange(256))
-_idmap = str('').join(l)
-del l
+printable = digits + ascii_letters + punctuation + whitespace
# Functions which aren't available as string methods.
@@ -56,29 +44,6 @@ def capwords(s, sep=None):
return (sep or ' ').join(x.capitalize() for x in s.split(sep))
-# Construct a translation string
-_idmapL = None
-def maketrans(fromstr, tostr):
- """maketrans(frm, to) -> string
-
- Return a translation table (a string of 256 bytes long)
- suitable for use in string.translate. The strings frm and to
- must be of the same length.
-
- """
- if len(fromstr) != len(tostr):
- raise ValueError, "maketrans arguments must have same length"
- global _idmapL
- if not _idmapL:
- _idmapL = list(_idmap)
- L = _idmapL[:]
- fromstr = map(ord, fromstr)
- for i in range(len(fromstr)):
- L[fromstr[i]] = tostr[i]
- return ''.join(L)
-
-
-
####################################################################
import re as _re
@@ -118,15 +83,15 @@ class _TemplateMetaclass(type):
'delim' : _re.escape(cls.delimiter),
'id' : cls.idpattern,
}
- cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
+ cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
-class Template:
+class Template(metaclass=_TemplateMetaclass):
"""A string class for supporting $-substitutions."""
- __metaclass__ = _TemplateMetaclass
delimiter = '$'
idpattern = r'[_a-z][_a-z0-9]*'
+ flags = _re.IGNORECASE
def __init__(self, template):
self.template = template
@@ -182,365 +147,35 @@ class Template:
mapping = args[0]
# Helper function for .sub()
def convert(mo):
- named = mo.group('named')
+ named = mo.group('named') or mo.group('braced')
if named is not None:
try:
# We use this idiom instead of str() because the latter
# will fail if val is a Unicode containing non-ASCII
return '%s' % (mapping[named],)
except KeyError:
- return self.delimiter + named
- braced = mo.group('braced')
- if braced is not None:
- try:
- return '%s' % (mapping[braced],)
- except KeyError:
- return self.delimiter + '{' + braced + '}'
+ return mo.group()
if mo.group('escaped') is not None:
return self.delimiter
if mo.group('invalid') is not None:
- return self.delimiter
+ return mo.group()
raise ValueError('Unrecognized named group in pattern',
self.pattern)
return self.pattern.sub(convert, self.template)
-####################################################################
-# NOTE: Everything below here is deprecated. Use string methods instead.
-# This stuff will go away in Python 3.0.
-
-# Backward compatible names for exceptions
-index_error = ValueError
-atoi_error = ValueError
-atof_error = ValueError
-atol_error = ValueError
-
-# convert UPPER CASE letters to lower case
-def lower(s):
- """lower(s) -> string
-
- Return a copy of the string s converted to lowercase.
-
- """
- return s.lower()
-
-# Convert lower case letters to UPPER CASE
-def upper(s):
- """upper(s) -> string
-
- Return a copy of the string s converted to uppercase.
-
- """
- return s.upper()
-
-# Swap lower case letters and UPPER CASE
-def swapcase(s):
- """swapcase(s) -> string
-
- Return a copy of the string s with upper case characters
- converted to lowercase and vice versa.
-
- """
- return s.swapcase()
-
-# Strip leading and trailing tabs and spaces
-def strip(s, chars=None):
- """strip(s [,chars]) -> string
-
- Return a copy of the string s with leading and trailing
- whitespace removed.
- If chars is given and not None, remove characters in chars instead.
- If chars is unicode, S will be converted to unicode before stripping.
-
- """
- return s.strip(chars)
-
-# Strip leading tabs and spaces
-def lstrip(s, chars=None):
- """lstrip(s [,chars]) -> string
-
- Return a copy of the string s with leading whitespace removed.
- If chars is given and not None, remove characters in chars instead.
-
- """
- return s.lstrip(chars)
-
-# Strip trailing tabs and spaces
-def rstrip(s, chars=None):
- """rstrip(s [,chars]) -> string
-
- Return a copy of the string s with trailing whitespace removed.
- If chars is given and not None, remove characters in chars instead.
-
- """
- return s.rstrip(chars)
-
-
-# Split a string into a list of space/tab-separated words
-def split(s, sep=None, maxsplit=-1):
- """split(s [,sep [,maxsplit]]) -> list of strings
-
- Return a list of the words in the string s, using sep as the
- delimiter string. If maxsplit is given, splits at no more than
- maxsplit places (resulting in at most maxsplit+1 words). If sep
- is not specified or is None, any whitespace string is a separator.
-
- (split and splitfields are synonymous)
-
- """
- return s.split(sep, maxsplit)
-splitfields = split
-
-# Split a string into a list of space/tab-separated words
-def rsplit(s, sep=None, maxsplit=-1):
- """rsplit(s [,sep [,maxsplit]]) -> list of strings
-
- Return a list of the words in the string s, using sep as the
- delimiter string, starting at the end of the string and working
- to the front. If maxsplit is given, at most maxsplit splits are
- done. If sep is not specified or is None, any whitespace string
- is a separator.
- """
- return s.rsplit(sep, maxsplit)
-
-# Join fields with optional separator
-def join(words, sep = ' '):
- """join(list [,sep]) -> string
-
- Return a string composed of the words in list, with
- intervening occurrences of sep. The default separator is a
- single space.
-
- (joinfields and join are synonymous)
-
- """
- return sep.join(words)
-joinfields = join
-
-# Find substring, raise exception if not found
-def index(s, *args):
- """index(s, sub [,start [,end]]) -> int
-
- Like find but raises ValueError when the substring is not found.
-
- """
- return s.index(*args)
-
-# Find last substring, raise exception if not found
-def rindex(s, *args):
- """rindex(s, sub [,start [,end]]) -> int
-
- Like rfind but raises ValueError when the substring is not found.
-
- """
- return s.rindex(*args)
-
-# Count non-overlapping occurrences of substring
-def count(s, *args):
- """count(s, sub[, start[,end]]) -> int
-
- Return the number of occurrences of substring sub in string
- s[start:end]. Optional arguments start and end are
- interpreted as in slice notation.
-
- """
- return s.count(*args)
-
-# Find substring, return -1 if not found
-def find(s, *args):
- """find(s, sub [,start [,end]]) -> in
-
- Return the lowest index in s where substring sub is found,
- such that sub is contained within s[start,end]. Optional
- arguments start and end are interpreted as in slice notation.
-
- Return -1 on failure.
-
- """
- return s.find(*args)
-
-# Find last substring, return -1 if not found
-def rfind(s, *args):
- """rfind(s, sub [,start [,end]]) -> int
-
- Return the highest index in s where substring sub is found,
- such that sub is contained within s[start,end]. Optional
- arguments start and end are interpreted as in slice notation.
-
- Return -1 on failure.
-
- """
- return s.rfind(*args)
-
-# for a bit of speed
-_float = float
-_int = int
-_long = long
-
-# Convert string to float
-def atof(s):
- """atof(s) -> float
-
- Return the floating point number represented by the string s.
-
- """
- return _float(s)
-
-
-# Convert string to integer
-def atoi(s , base=10):
- """atoi(s [,base]) -> int
-
- Return the integer represented by the string s in the given
- base, which defaults to 10. The string s must consist of one
- or more digits, possibly preceded by a sign. If base is 0, it
- is chosen from the leading characters of s, 0 for octal, 0x or
- 0X for hexadecimal. If base is 16, a preceding 0x or 0X is
- accepted.
-
- """
- return _int(s, base)
-
-
-# Convert string to long integer
-def atol(s, base=10):
- """atol(s [,base]) -> long
-
- Return the long integer represented by the string s in the
- given base, which defaults to 10. The string s must consist
- of one or more digits, possibly preceded by a sign. If base
- is 0, it is chosen from the leading characters of s, 0 for
- octal, 0x or 0X for hexadecimal. If base is 16, a preceding
- 0x or 0X is accepted. A trailing L or l is not accepted,
- unless base is 0.
-
- """
- return _long(s, base)
-
-
-# Left-justify a string
-def ljust(s, width, *args):
- """ljust(s, width[, fillchar]) -> string
-
- Return a left-justified version of s, in a field of the
- specified width, padded with spaces as needed. The string is
- never truncated. If specified the fillchar is used instead of spaces.
-
- """
- return s.ljust(width, *args)
-
-# Right-justify a string
-def rjust(s, width, *args):
- """rjust(s, width[, fillchar]) -> string
-
- Return a right-justified version of s, in a field of the
- specified width, padded with spaces as needed. The string is
- never truncated. If specified the fillchar is used instead of spaces.
-
- """
- return s.rjust(width, *args)
-
-# Center a string
-def center(s, width, *args):
- """center(s, width[, fillchar]) -> string
-
- Return a center version of s, in a field of the specified
- width. padded with spaces as needed. The string is never
- truncated. If specified the fillchar is used instead of spaces.
-
- """
- return s.center(width, *args)
-
-# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
-# Decadent feature: the argument may be a string or a number
-# (Use of this is deprecated; it should be a string as with ljust c.s.)
-def zfill(x, width):
- """zfill(x, width) -> string
-
- Pad a numeric string x with zeros on the left, to fill a field
- of the specified width. The string x is never truncated.
-
- """
- if not isinstance(x, basestring):
- x = repr(x)
- return x.zfill(width)
-
-# Expand tabs in a string.
-# Doesn't take non-printing chars into account, but does understand \n.
-def expandtabs(s, tabsize=8):
- """expandtabs(s [,tabsize]) -> string
-
- Return a copy of the string s with all tab characters replaced
- by the appropriate number of spaces, depending on the current
- column, and the tabsize (default 8).
-
- """
- return s.expandtabs(tabsize)
-
-# Character translation through look-up table.
-def translate(s, table, deletions=""):
- """translate(s,table [,deletions]) -> string
-
- Return a copy of the string s, where all characters occurring
- in the optional argument deletions are removed, and the
- remaining characters have been mapped through the given
- translation table, which must be a string of length 256. The
- deletions argument is not allowed for Unicode strings.
-
- """
- if deletions or table is None:
- return s.translate(table, deletions)
- else:
- # Add s[:0] so that if s is Unicode and table is an 8-bit string,
- # table is converted to Unicode. This means that table *cannot*
- # be a dictionary -- for that feature, use u.translate() directly.
- return s.translate(table + s[:0])
-
-# Capitalize a string, e.g. "aBc dEf" -> "Abc def".
-def capitalize(s):
- """capitalize(s) -> string
-
- Return a copy of the string s with only its first character
- capitalized.
-
- """
- return s.capitalize()
-
-# Substring replacement (global)
-def replace(s, old, new, maxreplace=-1):
- """replace (str, old, new[, maxreplace]) -> string
-
- Return a copy of string str with all occurrences of substring
- old replaced by new. If the optional argument maxreplace is
- given, only the first maxreplace occurrences are replaced.
-
- """
- return s.replace(old, new, maxreplace)
-
-
-# Try importing optional built-in module "strop" -- if it exists,
-# it redefines some string operations that are 100-1000 times faster.
-# It also defines values for whitespace, lowercase and uppercase
-# that match <ctype.h>'s definitions.
-
-try:
- from strop import maketrans, lowercase, uppercase, whitespace
- letters = lowercase + uppercase
-except ImportError:
- pass # Use the original versions
-
########################################################################
# the Formatter class
# see PEP 3101 for details and purpose of this class
# The hard parts are reused from the C implementation. They're exposed as "_"
-# prefixed methods of str and unicode.
+# prefixed methods of str.
-# The overall parser is implemented in str._formatter_parser.
-# The field name parser is implemented in str._formatter_field_name_split
+# The overall parser is implemented in _string.formatter_parser.
+# The field name parser is implemented in _string.formatter_field_name_split
-class Formatter(object):
+class Formatter:
def format(self, format_string, *args, **kwargs):
return self.vformat(format_string, args, kwargs)
@@ -585,7 +220,7 @@ class Formatter(object):
def get_value(self, key, args, kwargs):
- if isinstance(key, (int, long)):
+ if isinstance(key, int):
return args[key]
else:
return kwargs[key]
@@ -607,6 +242,8 @@ class Formatter(object):
return str(value)
elif conversion == 'r':
return repr(value)
+ elif conversion == 'a':
+ return ascii(value)
raise ValueError("Unknown conversion specifier {0!s}".format(conversion))
@@ -618,7 +255,7 @@ class Formatter(object):
# if field_name is not None, it is looked up, formatted
# with format_spec and conversion and then used
def parse(self, format_string):
- return format_string._formatter_parser()
+ return _string.formatter_parser(format_string)
# given a field_name, find the object it references.
@@ -627,7 +264,7 @@ class Formatter(object):
# used_args: a set of which args have been used
# args, kwargs: as passed in to vformat
def get_field(self, field_name, args, kwargs):
- first, rest = field_name._formatter_field_name_split()
+ first, rest = _string.formatter_field_name_split(field_name)
obj = self.get_value(first, args, kwargs)