aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/encodings/punycode.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/encodings/punycode.py')
-rw-r--r--Lib/encodings/punycode.py57
1 files changed, 28 insertions, 29 deletions
diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py
index d97200fd35b..66c51013ea4 100644
--- a/Lib/encodings/punycode.py
+++ b/Lib/encodings/punycode.py
@@ -1,7 +1,6 @@
-# -*- coding: iso-8859-1 -*-
""" Codec for the Punicode encoding, as specified in RFC 3492
-Written by Martin v. Löwis.
+Written by Martin v. Löwis.
"""
import codecs
@@ -10,16 +9,15 @@ import codecs
def segregate(str):
"""3.1 Basic code point segregation"""
- base = []
- extended = {}
+ base = bytearray()
+ extended = set()
for c in str:
if ord(c) < 128:
- base.append(c)
+ base.append(ord(c))
else:
- extended[c] = 1
- extended = extended.keys()
- extended.sort()
- return "".join(base).encode("ascii"),extended
+ extended.add(c)
+ extended = sorted(extended)
+ return bytes(base), extended
def selective_len(str, max):
"""Return the length of str, considering only characters below max."""
@@ -76,16 +74,16 @@ def T(j, bias):
if res > 26: return 26
return res
-digits = "abcdefghijklmnopqrstuvwxyz0123456789"
+digits = b"abcdefghijklmnopqrstuvwxyz0123456789"
def generate_generalized_integer(N, bias):
"""3.3 Generalized variable-length integers"""
- result = []
+ result = bytearray()
j = 0
while 1:
t = T(j, bias)
if N < t:
result.append(digits[N])
- return result
+ return bytes(result)
result.append(digits[t + ((N - t) % (36 - t))])
N = (N - t) // (36 - t)
j += 1
@@ -108,21 +106,20 @@ def adapt(delta, first, numchars):
def generate_integers(baselen, deltas):
"""3.4 Bias adaptation"""
# Punycode parameters: initial bias = 72, damp = 700, skew = 38
- result = []
+ result = bytearray()
bias = 72
for points, delta in enumerate(deltas):
s = generate_generalized_integer(delta, bias)
result.extend(s)
bias = adapt(delta, points==0, baselen+points+1)
- return "".join(result)
+ return bytes(result)
def punycode_encode(text):
base, extended = segregate(text)
- base = base.encode("ascii")
deltas = insertion_unsort(text, extended)
extended = generate_integers(len(base), deltas)
if base:
- return base + "-" + extended
+ return base + b"-" + extended
return extended
##################### Decoding #####################################
@@ -137,7 +134,7 @@ def decode_generalized_number(extended, extpos, bias, errors):
char = ord(extended[extpos])
except IndexError:
if errors == "strict":
- raise UnicodeError, "incomplete punicode string"
+ raise UnicodeError("incomplete punicode string")
return extpos + 1, None
extpos += 1
if 0x41 <= char <= 0x5A: # A-Z
@@ -174,37 +171,39 @@ def insertion_sort(base, extended, errors):
char += pos // (len(base) + 1)
if char > 0x10FFFF:
if errors == "strict":
- raise UnicodeError, ("Invalid character U+%x" % char)
+ raise UnicodeError("Invalid character U+%x" % char)
char = ord('?')
pos = pos % (len(base) + 1)
- base = base[:pos] + unichr(char) + base[pos:]
+ base = base[:pos] + chr(char) + base[pos:]
bias = adapt(delta, (extpos == 0), len(base))
extpos = newpos
return base
def punycode_decode(text, errors):
- pos = text.rfind("-")
+ if isinstance(text, str):
+ text = text.encode("ascii")
+ if isinstance(text, memoryview):
+ text = bytes(text)
+ pos = text.rfind(b"-")
if pos == -1:
base = ""
- extended = text
+ extended = str(text, "ascii").upper()
else:
- base = text[:pos]
- extended = text[pos+1:]
- base = unicode(base, "ascii", errors)
- extended = extended.upper()
+ base = str(text[:pos], "ascii", errors)
+ extended = str(text[pos+1:], "ascii").upper()
return insertion_sort(base, extended, errors)
### Codec APIs
class Codec(codecs.Codec):
- def encode(self,input,errors='strict'):
+ def encode(self, input, errors='strict'):
res = punycode_encode(input)
return res, len(input)
- def decode(self,input,errors='strict'):
+ def decode(self, input, errors='strict'):
if errors not in ('strict', 'replace', 'ignore'):
- raise UnicodeError, "Unsupported error handling "+errors
+ raise UnicodeError("Unsupported error handling "+errors)
res = punycode_decode(input, errors)
return res, len(input)
@@ -215,7 +214,7 @@ class IncrementalEncoder(codecs.IncrementalEncoder):
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
if self.errors not in ('strict', 'replace', 'ignore'):
- raise UnicodeError, "Unsupported error handling "+self.errors
+ raise UnicodeError("Unsupported error handling "+self.errors)
return punycode_decode(input, self.errors)
class StreamWriter(Codec,codecs.StreamWriter):