aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/encodings/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/encodings/__init__.py')
-rw-r--r--Lib/encodings/__init__.py53
1 files changed, 24 insertions, 29 deletions
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index b85ca823aec..8dd713056ef 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -10,7 +10,7 @@
Each codec module must export the following interface:
* getregentry() -> codecs.CodecInfo object
- The getregentry() API must a CodecInfo object with encoder, decoder,
+ The getregentry() API must return a CodecInfo object with encoder, decoder,
incrementalencoder, incrementaldecoder, streamwriter and streamreader
atttributes which adhere to the Python Codec Interface Standard.
@@ -29,18 +29,11 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
"""#"
import codecs
-from encodings import aliases
-import __builtin__
+from . import aliases
_cache = {}
_unknown = '--unknown--'
_import_tail = ['*']
-_norm_encoding_map = (' . '
- '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
- ' abcdefghijklmnopqrstuvwxyz '
- ' '
- ' '
- ' ')
_aliases = aliases.aliases
class CodecRegistryError(LookupError, SystemError):
@@ -59,14 +52,19 @@ def normalize_encoding(encoding):
non-ASCII characters, these must be Latin-1 compatible.
"""
- # Make sure we have an 8-bit string, because .translate() works
- # differently for Unicode strings.
- if hasattr(__builtin__, "unicode") and isinstance(encoding, unicode):
- # Note that .encode('latin-1') does *not* use the codec
- # registry, so this call doesn't recurse. (See unicodeobject.c
- # PyUnicode_AsEncodedString() for details)
- encoding = encoding.encode('latin-1')
- return '_'.join(encoding.translate(_norm_encoding_map).split())
+ if isinstance(encoding, bytes):
+ encoding = str(encoding, "ascii")
+ chars = []
+ punct = False
+ for c in encoding:
+ if c.isalnum() or c == '.':
+ if punct and chars:
+ chars.append('_')
+ chars.append(c)
+ punct = False
+ else:
+ punct = True
+ return ''.join(chars)
def search_function(encoding):
@@ -120,18 +118,15 @@ def search_function(encoding):
entry = getregentry()
if not isinstance(entry, codecs.CodecInfo):
if not 4 <= len(entry) <= 7:
- raise CodecRegistryError,\
- 'module "%s" (%s) failed to register' % \
- (mod.__name__, mod.__file__)
- if not hasattr(entry[0], '__call__') or \
- not hasattr(entry[1], '__call__') or \
- (entry[2] is not None and not hasattr(entry[2], '__call__')) or \
- (entry[3] is not None and not hasattr(entry[3], '__call__')) or \
- (len(entry) > 4 and entry[4] is not None and not hasattr(entry[4], '__call__')) or \
- (len(entry) > 5 and entry[5] is not None and not hasattr(entry[5], '__call__')):
- raise CodecRegistryError,\
- 'incompatible codecs in module "%s" (%s)' % \
- (mod.__name__, mod.__file__)
+ raise CodecRegistryError('module "%s" (%s) failed to register'
+ % (mod.__name__, mod.__file__))
+ if not callable(entry[0]) or not callable(entry[1]) or \
+ (entry[2] is not None and not callable(entry[2])) or \
+ (entry[3] is not None and not callable(entry[3])) or \
+ (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \
+ (len(entry) > 5 and entry[5] is not None and not callable(entry[5])):
+ raise CodecRegistryError('incompatible codecs in module "%s" (%s)'
+ % (mod.__name__, mod.__file__))
if len(entry)<7 or entry[6] is None:
entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
entry = codecs.CodecInfo(*entry)