diff options
Diffstat (limited to 'Lib/encodings/punycode.py')
-rw-r--r-- | Lib/encodings/punycode.py | 57 |
1 files changed, 28 insertions, 29 deletions
diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py index d97200fd35b..66c51013ea4 100644 --- a/Lib/encodings/punycode.py +++ b/Lib/encodings/punycode.py @@ -1,7 +1,6 @@ -# -*- coding: iso-8859-1 -*- """ Codec for the Punicode encoding, as specified in RFC 3492 -Written by Martin v. Löwis. +Written by Martin v. Löwis. """ import codecs @@ -10,16 +9,15 @@ import codecs def segregate(str): """3.1 Basic code point segregation""" - base = [] - extended = {} + base = bytearray() + extended = set() for c in str: if ord(c) < 128: - base.append(c) + base.append(ord(c)) else: - extended[c] = 1 - extended = extended.keys() - extended.sort() - return "".join(base).encode("ascii"),extended + extended.add(c) + extended = sorted(extended) + return bytes(base), extended def selective_len(str, max): """Return the length of str, considering only characters below max.""" @@ -76,16 +74,16 @@ def T(j, bias): if res > 26: return 26 return res -digits = "abcdefghijklmnopqrstuvwxyz0123456789" +digits = b"abcdefghijklmnopqrstuvwxyz0123456789" def generate_generalized_integer(N, bias): """3.3 Generalized variable-length integers""" - result = [] + result = bytearray() j = 0 while 1: t = T(j, bias) if N < t: result.append(digits[N]) - return result + return bytes(result) result.append(digits[t + ((N - t) % (36 - t))]) N = (N - t) // (36 - t) j += 1 @@ -108,21 +106,20 @@ def adapt(delta, first, numchars): def generate_integers(baselen, deltas): """3.4 Bias adaptation""" # Punycode parameters: initial bias = 72, damp = 700, skew = 38 - result = [] + result = bytearray() bias = 72 for points, delta in enumerate(deltas): s = generate_generalized_integer(delta, bias) result.extend(s) bias = adapt(delta, points==0, baselen+points+1) - return "".join(result) + return bytes(result) def punycode_encode(text): base, extended = segregate(text) - base = base.encode("ascii") deltas = insertion_unsort(text, extended) extended = generate_integers(len(base), deltas) if base: - return base + "-" + extended + return base + b"-" + extended return extended ##################### Decoding ##################################### @@ -137,7 +134,7 @@ def decode_generalized_number(extended, extpos, bias, errors): char = ord(extended[extpos]) except IndexError: if errors == "strict": - raise UnicodeError, "incomplete punicode string" + raise UnicodeError("incomplete punicode string") return extpos + 1, None extpos += 1 if 0x41 <= char <= 0x5A: # A-Z @@ -174,37 +171,39 @@ def insertion_sort(base, extended, errors): char += pos // (len(base) + 1) if char > 0x10FFFF: if errors == "strict": - raise UnicodeError, ("Invalid character U+%x" % char) + raise UnicodeError("Invalid character U+%x" % char) char = ord('?') pos = pos % (len(base) + 1) - base = base[:pos] + unichr(char) + base[pos:] + base = base[:pos] + chr(char) + base[pos:] bias = adapt(delta, (extpos == 0), len(base)) extpos = newpos return base def punycode_decode(text, errors): - pos = text.rfind("-") + if isinstance(text, str): + text = text.encode("ascii") + if isinstance(text, memoryview): + text = bytes(text) + pos = text.rfind(b"-") if pos == -1: base = "" - extended = text + extended = str(text, "ascii").upper() else: - base = text[:pos] - extended = text[pos+1:] - base = unicode(base, "ascii", errors) - extended = extended.upper() + base = str(text[:pos], "ascii", errors) + extended = str(text[pos+1:], "ascii").upper() return insertion_sort(base, extended, errors) ### Codec APIs class Codec(codecs.Codec): - def encode(self,input,errors='strict'): + def encode(self, input, errors='strict'): res = punycode_encode(input) return res, len(input) - def decode(self,input,errors='strict'): + def decode(self, input, errors='strict'): if errors not in ('strict', 'replace', 'ignore'): - raise UnicodeError, "Unsupported error handling "+errors + raise UnicodeError("Unsupported error handling "+errors) res = punycode_decode(input, errors) return res, len(input) @@ -215,7 +214,7 @@ class IncrementalEncoder(codecs.IncrementalEncoder): class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): if self.errors not in ('strict', 'replace', 'ignore'): - raise UnicodeError, "Unsupported error handling "+self.errors + raise UnicodeError("Unsupported error handling "+self.errors) return punycode_decode(input, self.errors) class StreamWriter(Codec,codecs.StreamWriter): |